Redesign for multi-shot use
authorPaul Crowley <paul@ciphergoth.org>
Mon Dec 08 22:33:48 2008 +0000 (2008-12-08)
changeset 15e7443c5ce784
parent 14 b0c537274d5b
child 16 93c808119f42
Redesign for multi-shot use
corepy-impl
paralleltrivium.py
     1.1 --- a/corepy-impl	Mon Dec 08 21:59:43 2008 +0000
     1.2 +++ b/corepy-impl	Mon Dec 08 22:33:48 2008 +0000
     1.3 @@ -11,13 +11,14 @@
     1.4  num_rounds = 20000
     1.5  state = ts.a + ts.b + ts.c
     1.6  
     1.7 +ts.step(num_rounds)
     1.8  pt = ParallelTrivium(num_rounds)
     1.9  for i, b in enumerate(state):
    1.10      pt.data[i*16] = chr(b)
    1.11 -
    1.12 -ts.step(num_rounds)
    1.13 -pt.step(num_rounds)
    1.14 +pt.run()
    1.15      
    1.16  for j in range(num_rounds):
    1.17 -    assert ord(pt.data[16*j]) == ts.result[j]
    1.18 +    assert ord(pt.data[16*(j+288)]) == ts.result[j]
    1.19 +for j, b in enumerate(state):
    1.20 +    assert ord(pt.data[16*j]) == state[j]
    1.21  
     2.1 --- a/paralleltrivium.py	Mon Dec 08 21:59:43 2008 +0000
     2.2 +++ b/paralleltrivium.py	Mon Dec 08 22:33:48 2008 +0000
     2.3 @@ -41,10 +41,27 @@
     2.4  
     2.5      code = env.InstructionStream()
     2.6       
     2.7 -    lbl_loop = code.get_label("LOOP")
     2.8  
     2.9      code.add(x86.mov(r.eax, MemRef(r.ebp, 8))) 
    2.10 +    code.add(x86.mov(r.ebx, r.eax))
    2.11 +    code.add(x86.add(r.ebx, total_len * 16))
    2.12 +    lbl_cloop = code.get_label("CLOOP")
    2.13 +    code.add(lbl_cloop)
    2.14 +    assert total_len % 4 == 0
    2.15 +    code.add(x86.movdqa(r.xmm0, MemRef(r.eax,  0, data_size=8*16)))
    2.16 +    code.add(x86.movdqa(r.xmm1, MemRef(r.eax, 16, data_size=8*16)))
    2.17 +    code.add(x86.movdqa(r.xmm2, MemRef(r.eax, 32, data_size=8*16)))
    2.18 +    code.add(x86.movdqa(r.xmm3, MemRef(r.eax, 48, data_size=8*16)))
    2.19 +    code.add(x86.movdqa(MemRef(r.eax, total_len*16, data_size=8*16), r.xmm0))
    2.20 +    code.add(x86.movdqa(MemRef(r.eax, (total_len +1)*16, data_size=8*16), r.xmm1))
    2.21 +    code.add(x86.movdqa(MemRef(r.eax, (total_len +2)*16, data_size=8*16), r.xmm2))
    2.22 +    code.add(x86.movdqa(MemRef(r.eax, (total_len +3)*16, data_size=8*16), r.xmm3))
    2.23 +    code.add(x86.add(r.eax, 16*4))
    2.24 +    code.add(x86.cmp(r.eax, r.ebx))
    2.25 +    code.add(x86.jnz(lbl_cloop))
    2.26 +
    2.27      code.add(x86.mov(r.ebx, MemRef(r.ebp, 12)))
    2.28 +    lbl_loop = code.get_label("LOOP")
    2.29      code.add(lbl_loop)
    2.30  
    2.31      def bitref(o):
    2.32 @@ -77,17 +94,16 @@
    2.33  
    2.34  class ParallelTrivium(object):
    2.35      def __init__(self, max_rounds):
    2.36 +        assert max_rounds > 0
    2.37          self.max_rounds = max_rounds
    2.38          # FIXME: 288 hardcoded
    2.39 -        self.data = corepy.lib.extarray.extarray('b', 16*(max_rounds + 288))
    2.40 +        self.data = corepy.lib.extarray.extarray('b', 16*(max_rounds + 2*288))
    2.41 +        #print dir(self.data)
    2.42          self.data.clear()
    2.43 -        self.current_round = 0
    2.44      
    2.45 -    def step(self, n):
    2.46 -        if self.current_round + n > self.max_rounds:
    2.47 -            raise Exception("n is too big")
    2.48 +    def run(self):
    2.49          params = env.ExecParams()
    2.50 -        params.p1 = self.data.buffer_info()[0] + 16*(self.current_round)
    2.51 -        params.p2 = self.data.buffer_info()[0] + 16*(self.current_round + n)
    2.52 +        params.p1 = self.data.buffer_info()[0]
    2.53 +        params.p2 = self.data.buffer_info()[0] + 16*(288 + self.max_rounds)
    2.54          env.Processor().execute(_sse_code, params=params)
    2.55