记一次腾讯极客技术挑战-最小的输出自身MD5值的程序
2020-12-13 23:50:04 Author: mp.weixin.qq.com(查看原文) 阅读量:81 收藏

只允许64位的ELF可执行程序,总共有三个赛道,x86-64,arm64,mips64。不太熟悉汇编,三个赛道基本上都是用gcc来完成,辅助少量的汇编代码修改。

最终成绩

x86-64 472字节 第15名

arm64 816字节 第8名

mips63 1024字节 第7名

一开始有想过可否用碰撞实现,研究了几天后发现毫无头绪,就先用普通的方法尝试啦。

x86-64

主要几个点

  1. 不依赖任何库进行独立编译,对于程序 打印和 退出的操作,用内联汇编调用系统中断完成,计算md5算法中需要用到绝对值计算和sin函数,都diao y

  2. 直接在内存中读取自身,省掉了读取文件操作的步骤

  3. 计算md5的时候,根据md5算法的原理,以每64字节为一块进行计算,下一块的hash是根据上一块的hash算出的,我们可以直接算出第一块至倒数第二块的hash,硬编码写入,程序只用进行最后一次块的计算,节省体积。

  4. 对elf header处理,elf header有些标识位置可以利用

    1. 可以根据指令大小来做对应填充

  5. 对汇编指令的简单处理

    1. 先用gcc生成汇编文件,然后对它做一些简单的处理

    2. 一个简单处理的脚本

    3. sed-i"s/.section.*//g"my_3_amd.s sed-i"s/.ident.*//g"my_3_amd.s sed-i"s/pushq.*//g"my_3_amd.s sed-i"s/.size _start.*//g"my_3_amd.s sed-i"s/.align 4//g"my_3_amd.s sed-i"s/movl \$1, %edx/mov \$1, %dl/g"my_3_amd.s sed-i"s/.align 16//g"my_3_amd.s sed-i"s/movsbq %r11b, %r11//g"my_3_amd.s sed-i"s/movslq %ecx, %rcx//g"my_3_amd.s

  6. 手动处理

    1. 开头的 xorl%ecx,%ecx可删除,程序初始化时寄存器默认会为0

程序参考源码

  1. #define SYS_write 1

  2. #define SYS_exit 60

  3. #define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))

  4. static void write(char *d, int l)

  5. {

  6. __asm__ volatile("syscall"

  7. :

  8. : "a"(SYS_write), "D"(1), "S"((long)d), "d"(l)

  9. : "rcx", "r11", "memory");

  10. }

  11. static long double fsin_my(long double a)

  12. {

  13. long double res;

  14. asm __volatile__("fsin;\n\t"

  15. "fabs;\n\t"

  16. : "=t"(res)

  17. : "0"(a)

  18. : "memory");

  19. return res;

  20. }

  21. void _start(void)

  22. {

  23. unsigned char *msg = (unsigned char *)0x400000;

  24. const short initial_len =466; // 固定 需要填写

  25. const short new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8;

  26. short offset = new_len - (new_len % 64); // 最后一个块的位置 通过计算获得

  27. static unsigned int digest[4] = {1, 2, 3, 4}; // 需要填写

  28. msg[initial_len] = 0x80;

  29. *(long *)(msg + new_len) = initial_len << 3; //固定 需要填写 long 改成 char能节省4字节,未测试

  30. unsigned int *w = (unsigned int *)(msg + offset);

  31. unsigned int a = digest[0];

  32. unsigned int b = digest[1];

  33. unsigned int c = digest[2];

  34. unsigned int d = digest[3];

  35. for (int i= 0; i < 64; i++)

  36. {

  37. unsigned int f;

  38. int g;

  39. if (i < 16)

  40. {

  41. f = (b & c) | ((~b) & d);

  42. g = i;

  43. }

  44. else if (i < 32)

  45. {

  46. f = (d & b) | ((~d) & c);

  47. g = (5 * i + 1) % 16;

  48. }

  49. else if (i < 48)

  50. {

  51. f = b ^ c ^ d;

  52. g = (3 * i + 5) % 16;

  53. }

  54. else

  55. {

  56. f = c ^ (b | (~d));

  57. g = (7 * i) % 16;

  58. }

  59. long double t1 = fsin_my(i + 1);

  60. const unsigned long long t2 = (unsigned long long)1<<32;

  61. unsigned int k = (unsigned int)(t2 * t1);

  62. f += a + k + w[g];

  63. a = d;

  64. d = c;

  65. c = b;

  66. const char r[] = {7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21};

  67. b = b + LEFTROTATE(f, r[i / 16 * 4 + i % 4]);

  68. }

  69. digest[0] += a;

  70. digest[1] += b;

  71. digest[2] += c;

  72. digest[3] += d;

  73. unsigned char *md = (unsigned char *)&digest[0];

  74. char x[32];

  75. for (int i = 0; i < 32; i++)

  76. {

  77. int a = (md[i / 2] >> (4 * (1 - i % 2))) & 0xF;

  78. char c = a >= 10 ? a + ('a' - 10) : a + '0';

  79. x[i] = c;

  80. }

  81. write(&x, 32);

  82. __asm__ volatile("syscall"

  83. :

  84. : "a"(SYS_exit), "D"(0)

  85. : "rcx", "r11", "memory");

  86. }

编译流程

  1. 生成汇编代码

    CFLAGS="-static -Os -Wl,--omagic -ffunction-sections -Wl,--gc-sections -nostartfiles -nodefaultlibs -nostdlib -nostdinc -fomit-frame-pointer -fno-stack-protector -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops -fmerge-all-constants -fno-ident -finline-functions-called-once -I ./ -fdata-sections"gcc $CFLAGS my3amd.c -S -mavx -msse -mavx2 -ffast-math # -fverbose-asm./change_asm.sh # 自动优化汇编代码那部分
  2. 手动优化汇编代码

  3. 汇编生成二进制

      CFLAGS="-static -Os -Wl,--omagic -ffunction-sections -Wl,--gc-sections -nostartfiles -nodefaultlibs -nostdlib -nostdinc -Wl,--build-id=none -fomit-frame-pointer -fno-stack-protector -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops -fmerge-all-constants -fno-ident  -finline-functions-called-once -I ./ "gcc   $CFLAGS  my_3_amd.s  -o test_amd64objdump -S test_amd64strip -s test_amd64# strip 减少 1088-704=384
      ../../sstrip test_amd64# sstri 减少 704-492=212
      ls -l|grep test./test_amd64echomd5sum test_amd64
  1. 改造elf

    1. rm selfmd5-test../../elftoc-E test_amd64>selfmd5.h mv selfmd5.h../../g++-g-o../../trim-elf../../trim-src.c&&../../trim-elf ls-l|grepselfchmod777selfmd5-test./selfmd5-test echo md5sum selfmd5-test

  2. trim-src.c

      #include "selfmd5.h"#include <fcntl.h>#include <unistd.h>#include <stdlib.h>#include <stdio.h>#include <string.h>
      //typedef struct {// unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */// Elf64_Half e_type; /* Object file type */// Elf64_Half e_machine; /* Architecture */// Elf64_Word e_version; /* Object file version */// Elf64_Addr e_entry; /* Entry point virtual address */// Elf64_Off e_phoff; /* Program header table file offset */// Elf64_Off e_shoff; /* Section header table file offset */// Elf64_Word e_flags; /* Processor-specific flags */// Elf64_Half e_ehsize; /* ELF header size in bytes */// Elf64_Half e_phentsize; /* Program header table entry size */// Elf64_Half e_phnum; /* Program header table entry count */// Elf64_Half e_shentsize; /* Section header table entry size */// Elf64_Half e_shnum; /* Section header table entry count */// Elf64_Half e_shstrndx; /* Section header string table index *///} Elf64_Ehdr;
      //typedef struct//{// Elf64_Word p_type; /* Segment type */// Elf64_Word p_flags; /* Segment flags */// Elf64_Off p_offset; /* Segment file offset */// Elf64_Addr p_vaddr; /* Segment virtual address */// Elf64_Addr p_paddr; /* Segment physical address */// Elf64_Xword p_filesz; /* Segment size in file */// Elf64_Xword p_memsz; /* Segment size in memory */// Elf64_Xword p_align; /* Segment alignment *///} Elf64_Phdr;
      int main(int argc, char *argv[]){
      size_t size = sizeof(foo); // - sizeof(foo._end);
      printf("foo size:%d\n",size);
      for (int i = 8; i < 16; ++i) { foo.ehdr.e_ident[i] = 0xFF; // 10可修改,留2位跳转 } foo.ehdr.e_version = 0xAAAAAAAA; // 2 可修改,留2位跳转 foo.ehdr.e_shoff = 0xFFFFFFFFFFFFFFFF; // 6可修改,留2跳转 foo.ehdr.e_flags = 0xCCCCCCCC; //2 可修改,留2跳转
      // long code_text = offsetof(elf, text) + ADDR_TEXT; // printf("代码段偏移位置 offset %d,%d\n", offsetof(elf, text), code_text);
      // foo.ehdr.e_entry = code_text; // foo.ehdr.e_ident[8] = 0xEB; // foo.ehdr.e_ident[9] = offsetof(elf, text) - 10; // *(long *)(foo.ehdr.e_ident + 9) =code_text ; // foo.ehdr.e_ident[9] = offsetof(elf, text); // foo.ehdr.e_ident[10] = code_text<<4; // printf("new entry %d\n", entry);
      foo.ehdr.e_entry = ADDR_TEXT + 8;
      // copy 6 bytes foo.ehdr.e_ident[8] = foo.text[0]; foo.ehdr.e_ident[9] = foo.text[1]; foo.ehdr.e_ident[10] = foo.text[2]; foo.ehdr.e_ident[11] = foo.text[3]; foo.ehdr.e_ident[12] = foo.text[4]; foo.ehdr.e_ident[13] = foo.text[5]; foo.ehdr.e_ident[14] = 0xEB; foo.ehdr.e_ident[15] = 24;
      // foo.ehdr.e_ident[16] = 0xEB; // foo.ehdr.e_ident[13] = 106;//26
      printf("jmp %d\n", foo.ehdr.e_ident[15]);
      for (int i = 0; i < sizeof(foo.text) - 6; ++i) { foo.text[i] = foo.text[i + 6]; } size -= 6;
      // copy 8 bytes ((char *)(&foo.ehdr.e_shoff))[0] = foo.text[0]; ((char *)(&foo.ehdr.e_shoff))[1] = foo.text[1]; ((char *)(&foo.ehdr.e_shoff))[2] = foo.text[2]; ((char *)(&foo.ehdr.e_shoff))[3] = foo.text[3]; ((char *)(&foo.ehdr.e_shoff))[4] = foo.text[4]; ((char *)(&foo.ehdr.e_shoff))[5] = foo.text[5]; ((char *)(&foo.ehdr.e_shoff))[6] = foo.text[6]; ((char *)(&foo.ehdr.e_shoff))[7] = foo.text[7]; ((char *)(&foo.ehdr.e_flags))[0] = 0xEB; ((char *)(&foo.ehdr.e_flags))[1] = 70; // ((char *)(&foo.ehdr.e_flags))[2] = 70;
      // ((char *)(&foo.ehdr.e_shoff))[7] = 0xEB; // ((char *)(&foo.ehdr.e_flags))[0] = 71;
      for (int i = 0; i < sizeof(foo.text) - 8; ++i) { foo.text[i] = foo.text[i + 8]; } size -= 8;


      // output FILE *fd = fopen("selfmd5-test", "wb"); size_t n = fwrite(&foo, size, 1, fd); printf("最终大小:%d\n", size); return 0;}

有些部分参考了知乎上的一篇文章,有修改

arm64

arm64和上面有些许不一样

  1. 没有省掉md5最后一轮的运算,因为发现省不省大小一样。没有从汇编层面去研究原因 - =。

  2. md5中的k值是硬编码上去的,发现用c语言实现一个sin函数体积很大,没有找到优化的方法

代码

  1. #define SYS_write 0x40 //64

  2. #define SYS_exit 0x5d // 93

  3. static void write(const void *buf, long size)

  4. {

  5. register long x8 __asm__("x8") = SYS_write;

  6. register long x0 __asm__("x0") = 1;

  7. register long x1 __asm__("x1") = (long)buf;

  8. register long x2 __asm__("x2") = size;

  9. __asm__ __volatile__(

  10. "svc 0"

  11. :

  12. : "r"(x8), "r"(x0), "r"(x1), "r"(x2)

  13. : "memory", "cc");

  14. }

  15. static void _exit()

  16. {

  17. register long x8 __asm__("x8") = SYS_exit;

  18. register long x0 __asm__("x0") = 0;

  19. __asm__ __volatile__(

  20. "svc 0"

  21. :

  22. : "r"(x8), "r"(x0)

  23. : "memory", "cc");

  24. }

  25. // leftrotate function definition

  26. #define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))

  27. void _start(void)

  28. {

  29. const short initial_len = 824;

  30. unsigned char *msg = (unsigned char *)0x400000;

  31. static const char r[] = { 7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21};

  32. static const unsigned int k[64] = {

  33. 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,

  34. 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,

  35. 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,

  36. 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,

  37. 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,

  38. 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,

  39. 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,

  40. 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,

  41. 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,

  42. 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,

  43. 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,

  44. 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,

  45. 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,

  46. 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,

  47. 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,

  48. 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391

  49. };

  50. unsigned int a, b, c, d;

  51. unsigned int h0, h1, h2, h3;

  52. // Initialize variables - simple count in nibbles:

  53. h0 = 0x67452301;

  54. h1 = 0xefcdab89;

  55. h2 = 0x98badcfe;

  56. h3 = 0x10325476;

  57. const int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8;

  58. msg[initial_len] = 0x80;

  59. *(unsigned long long *)(msg + new_len) = initial_len << 3;

  60. for (int offset = 0; offset < new_len; offset += 64)

  61. {

  62. unsigned int *w = (unsigned int *)(msg + offset);

  63. // Initialize hash value for this chunk:

  64. a = h0;

  65. b = h1;

  66. c = h2;

  67. d = h3;

  68. // Main loop:

  69. for (int i = 0; i < 64; i++)

  70. {

  71. unsigned int f;

  72. int g;

  73. if (i < 16)

  74. {

  75. f = (b & c) | ((~b) & d);

  76. g = i;

  77. }

  78. else if (i < 32)

  79. {

  80. f = (d & b) | ((~d) & c);

  81. g = (5 * i + 1) % 16;

  82. }

  83. else if (i < 48)

  84. {

  85. f = b ^ c ^ d;

  86. g = (3 * i + 5) % 16;

  87. }

  88. else

  89. {

  90. f = c ^ (b | (~d));

  91. g = (7 * i) % 16;

  92. }

  93. f += a + k[i] + w[g];

  94. a = d;

  95. d = c;

  96. c = b;

  97. b = b + LEFTROTATE(f, r[i / 16 * 4 + i % 4]);

  98. }

  99. h0 += a;

  100. h1 += b;

  101. h2 += c;

  102. h3 += d;

  103. }

  104. unsigned int digest[4] = {h0, h1, h2, h3};

  105. unsigned char *md = (unsigned char *)&digest;

  106. char buf[32];

  107. for (int i = 0; i < 32; i++)

  108. {

  109. char a = (md[i / 2] >> (4 * (1 - i % 2))) & 0xF;

  110. char c = a >= 10 ? a + ('a' - 10) : a + '0';

  111. buf[i] = c;

  112. }

  113. write(&buf, 32);

  114. _exit();

  115. }

编译

  1. 生成汇编代码

      CFLAGS="-static -Os -Wl,--omagic -ffunction-sections -Wl,--gc-sections -nostartfiles -nodefaultlibs -nostdlib -nostdinc -fomit-frame-pointer -fno-stack-protector -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops -fmerge-all-constants -fno-ident -finline-functions-called-once -I ./ -fdata-sections"gcc  $CFLAGS my_3_arm.c -S -ffast-math -fverbose-asm # -fverbose-asm
    1. 对arm的汇编不太熟悉,就删掉了exit后面的,可以优化两字节

  2. 汇编代码生成二进制

      CFLAGS="-static -Os -Wl,--omagic -ffunction-sections -Wl,--gc-sections -nostartfiles -nodefaultlibs -nostdlib -nostdinc -Wl,--build-id=none -fomit-frame-pointer -fno-stack-protector -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops -fmerge-all-constants -fno-ident  -finline-functions-called-once -I ./ "gcc   $CFLAGS  my_3_arm.s  -o test_arm64objdump -S test_arm64strip -s test_arm64ls -l|grep test_arm

mips64

mips64同arm64代码差不多,优化一下就可以直接用了。同样是sin函数无法实现,k值硬编码进去,占用了好多字节。

  1. #define SYS_exit 5058

  2. #define SYS_write 5001

  3. static void _exit()

  4. {

  5. register unsigned long __a0 asm("$4") = 0;

  6. __asm__ __volatile__(

  7. " li $2, %0 \n"

  8. " syscall \n"

  9. " .set pop "

  10. :

  11. : "i"(SYS_exit), "r"(__a0)

  12. : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24",

  13. "memory");

  14. }

  15. static inline int _write(char *buf, int len)

  16. {

  17. register unsigned long __a0 asm("$4") = (unsigned long)1;

  18. register unsigned long __a1 asm("$5") = (unsigned long)buf;

  19. register unsigned long __a2 asm("$6") = (unsigned long)len;

  20. register unsigned long __a3 asm("$7");

  21. unsigned long __v0;

  22. __asm__ __volatile__(

  23. " li $2, %2 \n"

  24. " syscall \n"

  25. " move %0, $2 \n"

  26. : "=r"(__v0), "=r"(__a3)

  27. : "i"(SYS_write), "r"(__a0), "r"(__a1), "r"(__a2)

  28. : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24",

  29. "memory");

  30. }

  31. #define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))

  32. void __start(void)

  33. {

  34. const short initial_len = 1056;

  35. unsigned char *msg = (unsigned char *)0x120000000;

  36. static const char r[] = {7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21};

  37. static const unsigned int k[64] = {

  38. 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,

  39. 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,

  40. 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,

  41. 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,

  42. 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,

  43. 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,

  44. 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,

  45. 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,

  46. 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,

  47. 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,

  48. 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,

  49. 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,

  50. 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,

  51. 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,

  52. 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,

  53. 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};

  54. unsigned int a, b, c, d;

  55. // Initialize variables - simple count in nibbles:

  56. // h0 = 0x67452301;

  57. // h1 = 0xefcdab89;

  58. // h2 = 0x98badcfe;

  59. // h3 = 0x10325476;

  60. static unsigned int hash[4] = {1,2,3,4};

  61. const int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8;

  62. msg[initial_len] = 0x80;

  63. *(unsigned long long *)(msg + new_len) = initial_len << 3;

  64. int offset = 1233;

  65. unsigned int *w = (unsigned int *)(msg + offset);

  66. // Initialize hash value for this chunk:

  67. a = hash[0];

  68. b = hash[1];

  69. c = hash[2];

  70. d = hash[3];

  71. // Main loop:

  72. for (int i = 0; i < 64; i++)

  73. {

  74. unsigned int f;

  75. int g;

  76. if (i < 16)

  77. {

  78. f = (b & c) | ((~b) & d);

  79. g = i;

  80. }

  81. else if (i < 32)

  82. {

  83. f = (d & b) | ((~d) & c);

  84. g = (5 * i + 1) % 16;

  85. }

  86. else if (i < 48)

  87. {

  88. f = b ^ c ^ d;

  89. g = (3 * i + 5) % 16;

  90. }

  91. else

  92. {

  93. f = c ^ (b | (~d));

  94. g = (7 * i) % 16;

  95. }

  96. f += a + k[i] + w[g];

  97. a = d;

  98. d = c;

  99. c = b;

  100. b = b + LEFTROTATE(f, r[i / 16 * 4 + i % 4]);

  101. }

  102. hash[0] += a;

  103. hash[1] += b;

  104. hash[2] += c;

  105. hash[3] += d;

  106. // unsigned int digest[] = {h0, h1, h2, h3};

  107. unsigned char *md = (unsigned char *)&hash;

  108. for (int i = 0; i < 32; i++)

  109. {

  110. char a = (md[i / 2] >> (4 * (1 - i % 2))) & 0xF;

  111. char c = a >= 10 ? a + ('a' - 10) : a + '0';

  112. _write(&c, 1);

  113. }

  114. _exit();

  115. }

编译

    CFLAGS="-static -Os -Wl,--omagic -ffunction-sections -ffast-math -Wl,--build-id=none -Wl,--gc-sections -nostartfiles -nodefaultlibs -nostdlib -nostdinc -fomit-frame-pointer -fno-stack-protector -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops -fmerge-all-constants -fno-ident -finline-functions-called-once -I ./ -fdata-sections"
    mips64el-linux-gnuabi64-gcc -mno-abicalls $CFLAGS my_3_mips.c -S -fverbose-asm # -fverbose-asm
    CFLAGS="-static -Os -Wl,--omagic -ffunction-sections -ffast-math -Wl,--build-id=none -Wl,--gc-sections -nostartfiles -nodefaultlibs -nostdlib -nostdinc -fomit-frame-pointer -fno-stack-protector -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops -fmerge-all-constants -fno-ident -finline-functions-called-once -I ./ -fdata-sections"
    mips64el-linux-gnuabi64-gcc $CFLAGS -mno-abicalls my_3_mips.s -o test_mipsmips64el-linux-gnuabi64-objdump -S test_mips
    ls -l |grep test_mips./test_mipsecho md5sum test_mips

复盘

对于x86-64的程序,感觉用c语言是已经优化到了极限,在接下来就是对汇编进行优化,将那些占用空间的寄存器统统替换。

对于arm64和mips64,就是sin函数的最优化写法的问题了。

这些是我想到的最后再优化的解决方法了,不过因为时间原因,还都未能实现。

比赛结束后,群里面各路大神放出了自己的思路,让我深知,自己的差距并不在那里。。很多大佬的奇淫巧技让人目瞪狗呆。。对于汇编代码的优化也是让人望尘莫及。。

详情见:https://mp.weixin.qq.com/s/zQu8YPwKZevBu8zx7jMzYg

有些方法和我的类似,说说和我不一样的做法。。

  • 碰撞最后iv为0 (来自第一名思路)

    • 前面说过可以提前计算md5第(n-1)个块,最后结果会有4个数值,利用elf中的缝隙字节做碰撞,将最后一个iv爆破为0,可以节省代码占用的4个字节。

    • eg:

      $ nasm ./zero_res.asm && chmod +x ./zero_res && ./zero_res && echo # 编译运行,最后在echo一个回车96935fafb893d3325b244bfe2ca98908$ ./md5 ./zero_res # 这个程序用来计算目标文件n-1个block后的结果,并输出完整程序的md5值0xc718942d, 0x27848216, 0x26f99fc3, 0x0000000096935fafb893d3325b244bfe2ca98908
  • 碰撞md5纯数字输出(来自出题人思路)

    • 也是利用缝隙字节做碰撞,让程序输出全为数字,即省去了处理输出字母的代码。

  • sin的计算

  • 使用泰勒展开式

    迭代到17就可满足精度,但是我用泰勒公式写出来的指令较多


    因为不需要计算任意角,只需要计算1-64的整数,所以最后采用了两角和公式:

    sin1、cos1作为常数,循环依次加到64即可

      s[0] = 0.8414709848078965;  c[0] = 0.5403023058681398;  for (i=1;i<64;i++)  {    s[i] = s[i-1] * c[0] + c[i-1] * s[0];    c[i] = c[i-1] * c[0] - s[i-1] * s[0];  }

    另一个实现思路

    void cal_sin(int n) {          // 角度转换       n = n * (3.142 / 180.0);          // 等差数列      float x1 = n;       float sinx = n;                    int i = 1;       do      {             x1 = (-x1 * n * n) / (2 * i * (2 * i + 1));             sinx = sinx + x1;       } while (++i<6);       return sinx;} 

最后

很感谢出题人的提供的比赛机会,这次比赛感觉到收获满满也很快乐~比赛途中好几次优化后,排名直冲第一,高兴没几个小时,就被别人比下去了,然后我又继续优化几个字节,再把他给比下去,哈哈哈哈,一来一往好几个回合,最后汇编大佬们都纷纷出场,然后我就再也没有机会了。。哭)


文章来源: http://mp.weixin.qq.com/s?__biz=MzU2NzcwNTY3Mg==&mid=2247483874&idx=1&sn=ef062f88be92ca21e6b68cc68de62d5a&chksm=fc9868c5cbefe1d39af49bf125766e294aa58d1b0d0631ab229f3fd88693ab1ffe6c8f87e37f#rd
如有侵权请联系:admin#unsafe.sh