1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
|
{
This file is part of the Free Pascal run time library.
Copyright (c) 2008 by the Free Pascal development team
This file contains some helper routines for int64 and qword
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
{$ifndef CPUAVR_16_REGS}
{$define FPC_SYSTEM_HAS_SHR_QWORD}
// Simplistic version with checking if whole bytes can be shifted
// Doesn't change bitshift portion even if possible because of byteshift
// Shorter code but not shortest execution time version
function fpc_shr_qword(value: qword; shift: ALUUInt): qword; assembler; nostackframe;
[public, alias: 'FPC_SHR_QWORD']; compilerproc;
label
byteshift, bitshift, finish;
asm
// value passed in R25...R18
// shift passed in R16
// return value in R25...R18
push R16
andi R16, 63 // mask 64 bit relevant value per generic routine
byteshift:
breq finish // shift = 0, finished
cpi R16, 8 // Check if shift is at least a byte
brlo bitshift
mov R18, R19 // if so, then shift all bytes right by 1 position
mov R19, R20
mov R20, R21
mov R21, R22
mov R22, R23
mov R23, R24
mov R24, R25
clr R25 // and clear the high byte
subi R16, 8 // subtract 8 bits from shift
rjmp byteshift // check if another byte can be shifted
bitshift: // shift all 8 bytes right by 1 bit
lsr R25
ror R24
ror R23
ror R22
ror R21
ror R20
ror R19
ror R18
dec R16
brne bitshift // until R16 = 0
finish:
pop R16
end;
function fpc_shr_qword(value: qword; shift: ALUUInt): qword; external name 'FPC_SHR_QWORD';
{$define FPC_SYSTEM_HAS_SHL_QWORD}
function fpc_shl_qword(value: qword; shift: ALUUInt): qword; assembler; nostackframe;
[public, alias: 'FPC_SHL_QWORD']; compilerproc;
label
byteshift, bitshift, finish;
asm
// value passed in R25...R18
// shift passed in R16
// return value in R25...R18
push R16
andi R16, 63 // mask 64 bit relevant value per generic routine
byteshift:
breq finish // shift = 0, finished
cpi R16, 8 // Check if shift is at least a byte
brlo bitshift
mov R25, R24 // if so, then shift all bytes left by 1 position
mov R24, R23
mov R23, R22
mov R22, R21
mov R21, R20
mov R20, R19
mov R19, R18
clr R18 // and clear the high byte
subi R16, 8 // subtract 8 bits from shift
rjmp byteshift // check if another byte can be shifted
bitshift: // shift all 8 bytes left by 1 bit
lsl R18
rol R19
rol R20
rol R21
rol R22
rol R23
rol R24
rol R25
dec R16
brne bitshift // until R16 = 0
finish:
pop R16
end;
function fpc_shl_qword(value: qword; shift: ALUUInt): qword; external name 'FPC_SHL_QWORD';
{$define FPC_SYSTEM_HAS_SHL_INT64}
function fpc_shl_int64(value: int64; shift: ALUUInt): int64;
[public, alias: 'FPC_SHL_INT64']; compilerproc; inline;
begin
Result := fpc_shl_qword(qword(value), shift);
end;
{$define FPC_SYSTEM_HAS_SHR_INT64}
// shr of signed int is same as shr of unsigned int (logical shift right)
function fpc_shr_int64(value: int64; shift: ALUUInt): int64; [public, alias: 'FPC_SHR_INT64']; compilerproc;
begin
Result := fpc_shr_qword(qword(value), shift);
end;
{$define FPC_SYSTEM_HAS_DIV_QWORD}
function fpc_div_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_DIV_QWORD']; compilerproc;
label
start, div1, div2, div3, finish;
asm
// Symbol Name Register(s)
// z (A) dividend R17, R16, R15, R14, R13, R12, R11, R10
// n (B) divisor R25, R24, R23, R22, R21, R20, R19, R18
// r (P) remainder R9, R8, R7, R6, R5, R4, R3, R2
// i counter R26
// 1 R27
cp R25, R1
cpc R24, R1
cpc R23, R1
cpc R22, R1
cpc R21, R1
cpc R20, R1
cpc R19, R1
cpc R18, R1
brne .LNonZero
{$ifdef CPUAVR_HAS_JMP_CALL}
call fpc_divbyzero
{$else CPUAVR_HAS_JMP_CALL}
rcall fpc_divbyzero
{$endif CPUAVR_HAS_JMP_CALL}
.LNonZero:
push R17
push R16
push R15
push R14
push R13
push R12
push R11
push R10
push R9
push R8
push R7
push R6
push R5
push R4
push R3
push R2
ldi R27, 1 // needed below for OR instruction
start: // Start of division...
clr R9 // clear remainder
clr R8
clr R7
clr R6
clr R5
clr R4
clr R3
clr R2
ldi R26, 64 // iterate over 64 bits
div1:
lsl R10 // shift left A_L
rol R11
rol R12
rol R13
rol R14
rol R15
rol R16
rol R17
rol R2 // shift left P with carry from A shift
rol R3
rol R4
rol R5
rol R6
rol R7
rol R8
rol R9
sub R2, R18 // Subtract B from P, P <= P - B
sbc R3, R19
sbc R4, R20
sbc R5, R21
sbc R6, R22
sbc R7, R23
sbc R8, R24
sbc R9, R25
brlo div2
or R10, R27 // Set A[0] = 1
rjmp div3
div2: // negative branch, A[0] = 0 (default after shift), restore P
add R2, R18 // restore old value of P
adc R3, R19
adc R4, R20
adc R5, R21
adc R6, R22
adc R7, R23
adc R8, R24
adc R9, R25
div3:
dec R26
breq finish
rjmp div1
finish:
mov R25, R17 // Move answer from R17..10 to R25..18
mov R24, R16
mov R23, R15
mov R22, R14
mov R21, R13
mov R20, R12
mov R19, R11
mov R18, R10
pop R2
pop R3
pop R4
pop R5
pop R6
pop R7
pop R8
pop R9
pop R10
pop R11
pop R12
pop R13
pop R14
pop R15
pop R16
pop R17
end;
function fpc_div_qword(n,z : qword): qword; external name 'FPC_DIV_QWORD';
{$define FPC_SYSTEM_HAS_MOD_QWORD}
function fpc_mod_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_MOD_QWORD']; compilerproc;
label
start, div1, div2, div3, finish;
asm
// Symbol Name Register(s)
// z (A) dividend R17, R16, R15, R14, R13, R12, R11, R10
// n (B) divisor R25, R24, R23, R22, R21, R20, R19, R18
// r (P) remainder R9, R8, R7, R6, R5, R4, R3, R2
// i counter R26
// 1 R27
cp R25, R1
cpc R24, R1
cpc R23, R1
cpc R22, R1
cpc R21, R1
cpc R20, R1
cpc R19, R1
cpc R18, R1
brne .LNonZero
{$ifdef CPUAVR_HAS_JMP_CALL}
call fpc_divbyzero
{$else CPUAVR_HAS_JMP_CALL}
rcall fpc_divbyzero
{$endif CPUAVR_HAS_JMP_CALL}
.LNonZero:
push R17
push R16
push R15
push R14
push R13
push R12
push R11
push R10
push R9
push R8
push R7
push R6
push R5
push R4
push R3
push R2
ldi R27, 1
start: // Start of division...
clr R9 // clear remainder
clr R8
clr R7
clr R6
clr R5
clr R4
clr R3
clr R2
ldi R26, 64 // iterate over 64 bits
div1:
lsl R10 // shift left A_L
rol R11
rol R12
rol R13
rol R14
rol R15
rol R16
rol R17
rol R2 // shift left P with carry from A shift
rol R3
rol R4
rol R5
rol R6
rol R7
rol R8
rol R9
sub R2, R18 // Subtract B from P, P <= P - B
sbc R3, R19
sbc R4, R20
sbc R5, R21
sbc R6, R22
sbc R7, R23
sbc R8, R24
sbc R9, R25
brlo div2
or R10, R27 // Set A[0] = 1
rjmp div3
div2: // negative branch, A[0] = 0 (default after shift), restore P
add R2, R18 // restore old value of P
adc R3, R19
adc R4, R20
adc R5, R21
adc R6, R22
adc R7, R23
adc R8, R24
adc R9, R25
div3:
dec R26
breq finish
rjmp div1
finish:
mov R25, R9 // Move answer from R9..2 to R25..18
mov R24, R8
mov R23, R7
mov R22, R6
mov R21, R5
mov R20, R4
mov R19, R3
mov R18, R2
pop R2
pop R3
pop R4
pop R5
pop R6
pop R7
pop R8
pop R9
pop R10
pop R11
pop R12
pop R13
pop R14
pop R15
pop R16
pop R17
end;
function fpc_mod_qword(n,z : qword): qword; external name 'FPC_MOD_QWORD';
{$define FPC_SYSTEM_HAS_DIV_INT64}
function fpc_div_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_DIV_INT64']; compilerproc;
label
pos1, pos2, fin;
asm
// Convert n, z to unsigned int, then call div_qword,
// Restore sign if high bits of n xor z is negative
// n divisor R25, R24, R23, R22, R21, R20, R19, R18
// z dividend R17, R16, R15, R14, R13, R12, R11, R10
// neg_result R30
// one R31
mov R30, R17 // store hi8(z)
eor R30, R25 // hi8(z) XOR hi8(n), answer must be negative if MSB set
// convert n to absolute
ldi R31, 1 // 1 in R31 used later
sub R25, r1 // subtract 0, just to check sign flag
brpl pos1
com R25
com R24
com R23
com R22
com R21
com R20
com R19
com R18
add R18, R31 // add 1
adc R19, R1 // add carry bit
adc R20, R1
adc R21, R1
adc R22, R1
adc R23, R1
adc R24, R1
adc R25, R1
pos1:
sub R17, R1
brpl pos2
com R17
com R16
com R15
com R14
com R13
com R12
com R11
com R10
add R10, R31
adc R11, R1
adc R12, R1
adc R13, R1
adc R14, R1
adc R15, R1
adc R16, R1
adc R17, R1
pos2:
{$ifdef CPUAVR_HAS_JMP_CALL}
call fpc_div_qword
{$else CPUAVR_HAS_JMP_CALL}
rcall fpc_div_qword
{$endif CPUAVR_HAS_JMP_CALL}
sbrs R30, 7 // skip if bit 7 is cleared (result should be positive)
rjmp fin
com R25 // result from FPC_DIV_WORD in R25 ... R22
com R24
com R23
com R22
com R21
com R20
com R19
com R18
ldi R31, 1
add R18, R31 // add 1
adc R19, R1 // add carry bit
adc R20, R1
adc R21, R1
adc R22, R1
adc R23, R1
adc R24, R1
adc R25, R1
fin:
end;
{$define FPC_SYSTEM_HAS_MOD_INT64}
function fpc_mod_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_MOD_INT64']; compilerproc;
label
pos1, pos2, fin;
asm
// Convert n, z to unsigned int, then call mod_qword,
// Restore sign if high bits of n xor z is negative
// n divisor R25, R24, R23, R22, R21, R20, R19, R18
// z dividend R17, R16, R15, R14, R13, R12, R11, R10
// neg_result R30
// one R31
mov R30, R17 // store hi8(z)
// convert n to absolute
ldi R31, 1
sub R25, r1 // subtract 0, just to check sign flag
brpl pos1
com R25
com R24
com R23
com R22
com R21
com R20
com R19
com R18
add R18, R31 // add 1
adc R19, R1 // add carry bit
adc R20, R1
adc R21, R1
adc R22, R1
adc R23, R1
adc R24, R1
adc R25, R1
pos1:
sub R17, R1
brpl pos2
com R17
com R16
com R15
com R14
com R13
com R12
com R11
com R10
add R10, R31
adc R11, R1
adc R12, R1
adc R13, R1
adc R14, R1
adc R15, R1
adc R16, R1
adc R17, R1
pos2:
{$ifdef CPUAVR_HAS_JMP_CALL}
call fpc_mod_qword
{$else CPUAVR_HAS_JMP_CALL}
rcall fpc_mod_qword
{$endif CPUAVR_HAS_JMP_CALL}
sbrs R30, 7 // Not finished if sign bit is set
rjmp fin
com R25 // Convert to 2's complement
com R24 // Complement all bits...
com R23
com R22
com R21
com R20
com R19
com R18
ldi R31, 1
add R18, R31 // ...and add 1 to answer
adc R19, R1
adc R20, R1
adc R21, R1
adc R22, R1
adc R23, R1
adc R24, R1
adc R25, R1
fin:
end;
{$endif CPUAVR_16_REGS}
|