.code ; the beginning of the code
 ; section
WinMainCRTStartup proc h:DWORD, r:DWORD, u:DWORD ; the dll entry point
 mov rax, 1 ; if eax is 0, the dll won't
 ; start
 ret ; return
WinMainCRTStartup Endp ; end of the dll entry
_DllMainCRTStartup proc h:DWORD, r:DWORD, u:DWORD ; the dll entry point
 mov rax, 1 ; if eax is 0, the dll won't
 ; start
 ret ; return
_DllMainCRTStartup Endp                                 
SpeckEncrypt proc plaintText:QWORD, cipherText:QWORD, Key:QWORD
; Pass in 3 addresses pointing to the base of the plainText, cipherText, and         Key arrays
; These come in as RCX, RDX, and R8, respectively
; I will use These, RAX, and R9 through R15 for my working space.  Will do 128 bit block, 128 bit key sizes, but they will fit nicely in 64 bit registers
; simple prologue, pushing ebp and ebx and the R# registers, and moving the value of esp into ebp for the duration of the proc  
push rbp
mov rbp,rsp
push rbx
push R9
push R10
push R11
push R12
push R13
push R14
push R15
; Move data into the registers for processing
mov r9,[rcx] ; rcx holds the memory location of the first 64 bits of plainText.  Move this into R9.  This is plainText[0] 
mov r10,[rcx+8] ; put next 64 bits into R10.  This is plainText[1]
;NOTE that the address of the cipherText is in RDX but we will fill r11 and r12 with values pointed at by RCX.  This is per the algorithm.  We will use RDX to output the final bytes
mov r11,[rcx] ; cipherText[0] = plainText[0]
mov r12,[rcx+8] ; cipherText[1] = plainText[1] 
mov r13, [r8] ;First 64 bits of key.  This is Key[0]
mov r14, [r8+8] ; Next 64 bits of key.  This is Key[1]
push rcx ; I could get away without this and loop in another register, but I want to count my loop in rcx so I free it up for that
mov rcx, 0 ; going to count up from here to 32.  Would count down but the algorithm uses the counter value in one permutation, so going to count up
ror r12,8
add r12,r11
xor r12,r13
rol r11,3
xor r11,r12
ror r14,8
add r14,r13
xor r14,rcx
rol r13,3
xor r13,r14
inc rcx
cmp rcx, 32
jne EncryptRoundFunction
pop rcx
; Move cipherText into memory pointed at by RDX.  We won't bother copying the Key or plainText back out
mov [rdx],r11
mov [rdx+8],r12
; Now the epilogue, returning values from the stack into non-volatile registers.
pop R15
pop R14
pop R13
pop R12
pop R11
pop R10
pop R9    
pop rbx    
pop rbp
ret ; return eax
SpeckEncrypt endp ; end of the function
SpeckDecrypt proc cipherText:QWORD, plainText:QWORD, Key:QWORD
; Pass in 3 addresses pointing to the base of the cipherText, plainText, and Key arrays
; These come in as RCX, RDX, and R8, respectively
; I will use These, RAX, and R9 through R15 for my working space.  Will do 128 bit block, 128 bit key sizes, but they will fit nicely in 64 bit registers
; simple prologue, pushing ebp and ebx and the R# registers, and moving the value of esp into ebp for the duration of the proc  
push rbp
mov rbp,rsp
push rbx
push R9
push R10
push R11
push R12
push R13
push R14
push R15
; Move data into the registers for processing
mov r9,[rcx] ; rcx holds the memory location of the first 64 bits of cipherText.  Move this into R9.  This is cipherText[0] 
mov r10,[rcx+8] ; put next 64 bits into R10.  This is cipherText[1]
;NOTE that the address of the plainText is in RDX but we will fill r11 and r12 with values pointed at by RCX.  This is per the algorithm.  We will use RDX to output the final bytes
mov r11,[rcx] ; plainText[0] = cipherText[0]
mov r12,[rcx+8] ; plainText[1] = cipherText[1] 
mov r13, [r8] ;First 64 bits of key.  This is Key[0]
mov r14, [r8+8] ; Next 64 bits of key.  This is Key[1]
push rcx ; I could get away without this and loop in another register, but I want to count my loop in rcx so I free it up for that
mov rcx, 0 ; We will count up while making the round keys
; On encrypt we could make each key just as we needed it.  But here we need the keys in reverse order.  To undo round 31 of encryption, for example, we need round key 31.
; So we will make them all and push them on the stack, pop them off again as we need them in the main DecryptRoundFunction
; I should pull this off and call it for encrypt and decrypt to save space, but for now will have it separate
; push r13 at the beginning of the process because we need a "raw" key by the time we reach decrypt round 0
; We will not push r14 because that half of the key is only used here in the round key generation function.
; We don't need it in the decrypt rounds
push r13
ror r14,8
add r14,r13
xor r14,rcx
rol r13,3
xor r13,r14
inc rcx
cmp rcx, 32
jne DecryptMakeRoundKeys
mov rcx, 32
dec rcx
pop r13
xor r11,r12
ror r11,3
xor r12,r13
sub r12,r11
rol r12,8
cmp rcx, 0
jne DecryptRoundFunction

pop rcx
; Move cipherText into memory pointed at by RDX.  We won't bother copying the Key or plainText back out
mov [rdx],r11
mov [rdx+8],r12
; Now the epilogue, returning values from the stack into non-volatile registers.
pop R15
pop R14
pop R13
pop R12
pop R11
pop R10
pop R9    
pop rbx    
pop rbp
ret ; return eax
SpeckDecrypt endp ; end of the function
End ; end of the dll


using System;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Windows.Forms;
namespace SpeckDLLTest
    public partial class Form1 : Form
        byte[] key = { 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
        public Form1()
        private void richTextBox1_TextChanged(object sender, EventArgs e)
            textBox1.Text = richTextBox1.Text.Length.ToString();
            if (richTextBox1.Text != "")
                byte[] plainText = ASCIIEncoding.ASCII.GetBytes(richTextBox1.Text);
                byte[] cipherText = new byte[plainText.Length];
                Thread t = new Thread(() =>
                    cipherText = Encrypt(plainText);
                    BeginInvoke(new Action(() => richTextBox2.Text = Convert.ToBase64String(cipherText)));

                byte[] plainAgain = new byte[cipherText.Length];
                t = new Thread(() =>
                        plainAgain = Decrypt(cipherText);
                        BeginInvoke(new Action(() => richTextBox3.Text = ASCIIEncoding.ASCII.GetString(plainAgain)));
                richTextBox2.Text = "";
                richTextBox3.Text = "";
        private byte[] Decrypt(byte[] cipherText)
            int blockCount = cipherText.Length / 16;
            if (cipherText.Length % 16 != 0) blockCount++;
            Array.Resize(ref cipherText, blockCount * 16);
            byte[] plainText = new byte[cipherText.Length];
                fixed (byte* plaintextPointer = plainText, ciphertextPointer = cipherText, keyPointer = key)
                    for (int i = 0; i < blockCount; i++)
                        for (int j = 0; j < 1; j++)
                            UnsafeMethods.SpeckDecrypt(ciphertextPointer + i * 16, plaintextPointer + i * 16, keyPointer);
            return plainText;
        private byte[] Encrypt(byte[] plainText)
            int blockCount = plainText.Length / 16;
            if (plainText.Length % 16 != 0) blockCount++;
            Array.Resize(ref plainText, blockCount * 16);
            byte[] cipherText = new byte[plainText.Length];
                fixed (byte* plaintextPointer = plainText, ciphertextPointer = cipherText, keyPointer = key)
                    for (int i = 0; i < blockCount; i++)
                        for (int j = 0; j < 1; j++)
                            UnsafeMethods.SpeckEncrypt(plaintextPointer + i * 16, ciphertextPointer + i * 16, keyPointer);
            return cipherText;
        private void button1_Click(object sender, EventArgs e)
            byte[] plainText = { 0x6c, 0x61, 0x76, 0x69, 0x75, 0x71, 0x65, 0x20, 0x74, 0x69, 0x20, 0x65, 0x64, 0x61, 0x6d, 0x20 };
            byte[] key = { 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
            byte[] testVector = { 0xa6, 0x5d, 0x98, 0x51, 0x79, 0x78, 0x32, 0x65, 0x78, 0x60, 0xfe, 0xdf, 0x5c, 0x57, 0x0d, 0x18 };
            byte[] cipherText = new byte[16];
                fixed (byte* plaintextPointer = plainText, ciphertextPointer = cipherText, keyPointer = key)
                    UnsafeMethods.SpeckEncrypt(plaintextPointer, ciphertextPointer, keyPointer);
                    bool testBool = true;
                    for (int i = 0; i < cipherText.Length; i++)
                        if (testVector[i] != cipherText[i]) testBool = false;
                    if (testBool == false) MessageBox.Show("Failed!");
                    else MessageBox.Show("Passed!");
    public static class UnsafeMethods
        unsafe public extern static void SpeckEncrypt(byte* plainText, byte* cipherText, byte* Key);
        unsafe public extern static void SpeckDecrypt(byte* cipherText, byte* plainText, byte* Key);





  1. 正如@RossRidge已经指出的那样,如果你只是简单地翻转整个数组,你可能是非常错误的,-你应该交换正在处理的特定片段的字节(BSWAP),而不是颠倒这些片段的顺序。
  2. 很有可能你高估了自己编写高效机器码的能力:例如,你没有将指令与不相关的寄存器交错以获得更好的乱序执行,你的循环没有对齐,你使用计数器增加到N而不是减少到零。当然,无论如何,这些代码仍然会比。net快10倍,但我强烈建议您用C编写实现并进行基准测试——以惊讶于编译器(MSVC, GCC)在优化即使是直接编写的程序方面有多好(相信我,我曾经在尝试完成相同任务时犯过同样的错误)。如果性能不是一个大问题,不要乱搞非托管代码,因为它只是一个外部的不可移植的依赖,增加了你的。net应用程序所需的信任级别。
  3. 使用。net函数处理字节时要小心,因为它们在端序方面非常不一致:BitConverter使用主机字节顺序,StreamReader总是坚持小端序,String是关于给定的编码(在所有UTF编码中,只有UTF-8是端序无关的)。
