1.1 --- a/corepy-impl Mon Dec 08 21:59:43 2008 +0000
1.2 +++ b/corepy-impl Mon Dec 08 22:33:48 2008 +0000
1.3 @@ -11,13 +11,14 @@
1.4 num_rounds = 20000
1.5 state = ts.a + ts.b + ts.c
1.6
1.7 +ts.step(num_rounds)
1.8 pt = ParallelTrivium(num_rounds)
1.9 for i, b in enumerate(state):
1.10 pt.data[i*16] = chr(b)
1.11 -
1.12 -ts.step(num_rounds)
1.13 -pt.step(num_rounds)
1.14 +pt.run()
1.15
1.16 for j in range(num_rounds):
1.17 - assert ord(pt.data[16*j]) == ts.result[j]
1.18 + assert ord(pt.data[16*(j+288)]) == ts.result[j]
1.19 +for j, b in enumerate(state):
1.20 + assert ord(pt.data[16*j]) == state[j]
1.21
2.1 --- a/paralleltrivium.py Mon Dec 08 21:59:43 2008 +0000
2.2 +++ b/paralleltrivium.py Mon Dec 08 22:33:48 2008 +0000
2.3 @@ -41,10 +41,27 @@
2.4
2.5 code = env.InstructionStream()
2.6
2.7 - lbl_loop = code.get_label("LOOP")
2.8
2.9 code.add(x86.mov(r.eax, MemRef(r.ebp, 8)))
2.10 + code.add(x86.mov(r.ebx, r.eax))
2.11 + code.add(x86.add(r.ebx, total_len * 16))
2.12 + lbl_cloop = code.get_label("CLOOP")
2.13 + code.add(lbl_cloop)
2.14 + assert total_len % 4 == 0
2.15 + code.add(x86.movdqa(r.xmm0, MemRef(r.eax, 0, data_size=8*16)))
2.16 + code.add(x86.movdqa(r.xmm1, MemRef(r.eax, 16, data_size=8*16)))
2.17 + code.add(x86.movdqa(r.xmm2, MemRef(r.eax, 32, data_size=8*16)))
2.18 + code.add(x86.movdqa(r.xmm3, MemRef(r.eax, 48, data_size=8*16)))
2.19 + code.add(x86.movdqa(MemRef(r.eax, total_len*16, data_size=8*16), r.xmm0))
2.20 + code.add(x86.movdqa(MemRef(r.eax, (total_len +1)*16, data_size=8*16), r.xmm1))
2.21 + code.add(x86.movdqa(MemRef(r.eax, (total_len +2)*16, data_size=8*16), r.xmm2))
2.22 + code.add(x86.movdqa(MemRef(r.eax, (total_len +3)*16, data_size=8*16), r.xmm3))
2.23 + code.add(x86.add(r.eax, 16*4))
2.24 + code.add(x86.cmp(r.eax, r.ebx))
2.25 + code.add(x86.jnz(lbl_cloop))
2.26 +
2.27 code.add(x86.mov(r.ebx, MemRef(r.ebp, 12)))
2.28 + lbl_loop = code.get_label("LOOP")
2.29 code.add(lbl_loop)
2.30
2.31 def bitref(o):
2.32 @@ -77,17 +94,16 @@
2.33
2.34 class ParallelTrivium(object):
2.35 def __init__(self, max_rounds):
2.36 + assert max_rounds > 0
2.37 self.max_rounds = max_rounds
2.38 # FIXME: 288 hardcoded
2.39 - self.data = corepy.lib.extarray.extarray('b', 16*(max_rounds + 288))
2.40 + self.data = corepy.lib.extarray.extarray('b', 16*(max_rounds + 2*288))
2.41 + #print dir(self.data)
2.42 self.data.clear()
2.43 - self.current_round = 0
2.44
2.45 - def step(self, n):
2.46 - if self.current_round + n > self.max_rounds:
2.47 - raise Exception("n is too big")
2.48 + def run(self):
2.49 params = env.ExecParams()
2.50 - params.p1 = self.data.buffer_info()[0] + 16*(self.current_round)
2.51 - params.p2 = self.data.buffer_info()[0] + 16*(self.current_round + n)
2.52 + params.p1 = self.data.buffer_info()[0]
2.53 + params.p2 = self.data.buffer_info()[0] + 16*(288 + self.max_rounds)
2.54 env.Processor().execute(_sse_code, params=params)
2.55