rtl/avr/int64p.inc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567

{
    This file is part of the Free Pascal run time library.
    Copyright (c) 2008 by the Free Pascal development team

    This file contains some helper routines for int64 and qword

    See the file COPYING.FPC, included in this distribution,
    for details about the copyright.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

 **********************************************************************}

{$ifndef CPUAVR_16_REGS}
{$define FPC_SYSTEM_HAS_SHR_QWORD}
// Simplistic version with checking if whole bytes can be shifted
// Doesn't change bitshift portion even if possible because of byteshift
// Shorter code but not shortest execution time version
function fpc_shr_qword(value: qword; shift: ALUUInt): qword; assembler; nostackframe;
  [public, alias: 'FPC_SHR_QWORD']; compilerproc;
label
  byteshift, bitshift, finish;
asm
// value passed in R25...R18
// shift passed in R16
// return value in R25...R18

  push R16

  andi R16, 63    // mask 64 bit relevant value per generic routine
byteshift:
  breq finish     // shift = 0, finished
  cpi R16, 8      // Check if shift is at least a byte
  brlo bitshift
  mov R18, R19    // if so, then shift all bytes right by 1 position
  mov R19, R20
  mov R20, R21
  mov R21, R22
  mov R22, R23
  mov R23, R24
  mov R24, R25
  clr R25         // and clear the high byte
  subi R16, 8     // subtract 8 bits from shift
  rjmp byteshift  // check if another byte can be shifted

bitshift:         // shift all 8 bytes right by 1 bit
  lsr R25
  ror R24
  ror R23
  ror R22
  ror R21
  ror R20
  ror R19
  ror R18

  dec R16
  brne bitshift   // until R16 = 0

finish:
  pop R16
end;
function fpc_shr_qword(value: qword; shift: ALUUInt): qword; external name 'FPC_SHR_QWORD';

{$define FPC_SYSTEM_HAS_SHL_QWORD}
function fpc_shl_qword(value: qword; shift: ALUUInt): qword; assembler; nostackframe;
[public, alias: 'FPC_SHL_QWORD']; compilerproc;
label
  byteshift, bitshift, finish;
asm
// value passed in R25...R18
// shift passed in R16
// return value in R25...R18
  push R16

  andi R16, 63    // mask 64 bit relevant value per generic routine
byteshift:
  breq finish     // shift = 0, finished
  cpi R16, 8      // Check if shift is at least a byte
  brlo bitshift
  mov R25, R24    // if so, then shift all bytes left by 1 position
  mov R24, R23
  mov R23, R22
  mov R22, R21
  mov R21, R20
  mov R20, R19
  mov R19, R18
  clr R18         // and clear the high byte
  subi R16, 8     // subtract 8 bits from shift
  rjmp byteshift  // check if another byte can be shifted

bitshift:         // shift all 8 bytes left by 1 bit
  lsl R18
  rol R19
  rol R20
  rol R21
  rol R22
  rol R23
  rol R24
  rol R25

  dec R16
  brne bitshift   // until R16 = 0

finish:
  pop R16
end;

function fpc_shl_qword(value: qword; shift: ALUUInt): qword; external name 'FPC_SHL_QWORD';

{$define FPC_SYSTEM_HAS_SHL_INT64}
function fpc_shl_int64(value: int64; shift: ALUUInt): int64;
  [public, alias: 'FPC_SHL_INT64']; compilerproc; inline;
begin
  Result := fpc_shl_qword(qword(value), shift);
end;

{$define FPC_SYSTEM_HAS_SHR_INT64}
// shr of signed int is same as shr of unsigned int (logical shift right)
function fpc_shr_int64(value: int64; shift: ALUUInt): int64; [public, alias: 'FPC_SHR_INT64']; compilerproc;
begin
  Result := fpc_shr_qword(qword(value), shift);
end;

{$define FPC_SYSTEM_HAS_DIV_QWORD}
function fpc_div_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_DIV_QWORD']; compilerproc;
label
  start, div1, div2, div3, finish;
asm
// Symbol  Name        Register(s)
// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
// i       counter     R26
//         1           R27

  cp R25, R1
  cpc R24, R1
  cpc R23, R1
  cpc R22, R1
  cpc R21, R1
  cpc R20, R1
  cpc R19, R1
  cpc R18, R1

  brne .LNonZero
{$ifdef CPUAVR_HAS_JMP_CALL}
  call fpc_divbyzero
{$else  CPUAVR_HAS_JMP_CALL}
  rcall fpc_divbyzero
{$endif CPUAVR_HAS_JMP_CALL}

.LNonZero:

  push R17
  push R16
  push R15
  push R14
  push R13
  push R12
  push R11
  push R10
  push R9
  push R8
  push R7
  push R6
  push R5
  push R4
  push R3
  push R2

  ldi R27, 1      // needed below for OR instruction

start:            // Start of division...
  clr R9          // clear remainder
  clr R8
  clr R7
  clr R6
  clr R5
  clr R4
  clr R3
  clr R2
  ldi R26, 64     // iterate over 64 bits

div1:
  lsl R10         // shift left A_L
  rol R11
  rol R12
  rol R13
  rol R14
  rol R15
  rol R16
  rol R17

  rol R2          // shift left P with carry from A shift
  rol R3
  rol R4
  rol R5
  rol R6
  rol R7
  rol R8
  rol R9

  sub R2, R18     // Subtract B from P, P <= P - B
  sbc R3, R19
  sbc R4, R20
  sbc R5, R21
  sbc R6, R22
  sbc R7, R23
  sbc R8, R24
  sbc R9, R25

  brlo div2
  or R10, R27     // Set A[0] = 1
  rjmp div3
div2:             // negative branch, A[0] = 0 (default after shift), restore P

  add R2, R18     // restore old value of P
  adc R3, R19
  adc R4, R20
  adc R5, R21
  adc R6, R22
  adc R7, R23
  adc R8, R24
  adc R9, R25

div3:
  dec R26
  breq finish
  rjmp div1

finish:
  mov R25, R17    // Move answer from R17..10 to R25..18
  mov R24, R16
  mov R23, R15
  mov R22, R14
  mov R21, R13
  mov R20, R12
  mov R19, R11
  mov R18, R10

  pop R2
  pop R3
  pop R4
  pop R5
  pop R6
  pop R7
  pop R8
  pop R9
  pop R10
  pop R11
  pop R12
  pop R13
  pop R14
  pop R15
  pop R16
  pop R17
end;
function fpc_div_qword(n,z : qword): qword; external name 'FPC_DIV_QWORD';

{$define FPC_SYSTEM_HAS_MOD_QWORD}
function fpc_mod_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_MOD_QWORD']; compilerproc;
label
  start, div1, div2, div3, finish;
asm
// Symbol  Name        Register(s)
// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
// i	   counter     R26
//         1           R27

  cp R25, R1
  cpc R24, R1
  cpc R23, R1
  cpc R22, R1
  cpc R21, R1
  cpc R20, R1
  cpc R19, R1
  cpc R18, R1

  brne .LNonZero
{$ifdef CPUAVR_HAS_JMP_CALL}
  call fpc_divbyzero
{$else  CPUAVR_HAS_JMP_CALL}
  rcall fpc_divbyzero
{$endif CPUAVR_HAS_JMP_CALL}

.LNonZero:

  push R17
  push R16
  push R15
  push R14
  push R13
  push R12
  push R11
  push R10
  push R9
  push R8
  push R7
  push R6
  push R5
  push R4
  push R3
  push R2

  ldi R27, 1
start:            // Start of division...
  clr R9          // clear remainder
  clr R8
  clr R7
  clr R6
  clr R5
  clr R4
  clr R3
  clr R2
  ldi R26, 64     // iterate over 64 bits

div1:
  lsl R10         // shift left A_L
  rol R11
  rol R12
  rol R13
  rol R14
  rol R15
  rol R16
  rol R17

  rol R2          // shift left P with carry from A shift
  rol R3
  rol R4
  rol R5
  rol R6
  rol R7
  rol R8
  rol R9

  sub R2, R18     // Subtract B from P, P <= P - B
  sbc R3, R19
  sbc R4, R20
  sbc R5, R21
  sbc R6, R22
  sbc R7, R23
  sbc R8, R24
  sbc R9, R25

  brlo div2
  or R10, R27     // Set A[0] = 1
  rjmp div3
div2:             // negative branch, A[0] = 0 (default after shift), restore P

  add R2, R18     // restore old value of P
  adc R3, R19
  adc R4, R20
  adc R5, R21
  adc R6, R22
  adc R7, R23
  adc R8, R24
  adc R9, R25

div3:
  dec R26
  breq finish
  rjmp div1

finish:
  mov R25, R9     // Move answer from R9..2 to R25..18
  mov R24, R8
  mov R23, R7
  mov R22, R6
  mov R21, R5
  mov R20, R4
  mov R19, R3
  mov R18, R2

  pop R2
  pop R3
  pop R4
  pop R5
  pop R6
  pop R7
  pop R8
  pop R9
  pop R10
  pop R11
  pop R12
  pop R13
  pop R14
  pop R15
  pop R16
  pop R17
end;
function fpc_mod_qword(n,z : qword): qword; external name 'FPC_MOD_QWORD';


{$define FPC_SYSTEM_HAS_DIV_INT64}
function fpc_div_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_DIV_INT64']; compilerproc;
label
  pos1, pos2, fin;
asm
// Convert n, z to unsigned int, then call div_qword,
// Restore sign if high bits of n xor z is negative
// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
//         neg_result  R30
//         one         R31

  mov R30, R17    // store hi8(z)
  eor R30, R25  // hi8(z) XOR hi8(n), answer must be negative if MSB set

  // convert n to absolute
  ldi R31, 1      // 1 in R31 used later
  sub R25, r1     // subtract 0, just to check sign flag
  brpl pos1
  com R25
  com R24
  com R23
  com R22
  com R21
  com R20
  com R19
  com R18
  add R18, R31    // add 1
  adc R19, R1     // add carry bit
  adc R20, R1
  adc R21, R1
  adc R22, R1
  adc R23, R1
  adc R24, R1
  adc R25, R1
  pos1:

  sub R17, R1
  brpl pos2
  com R17
  com R16
  com R15
  com R14
  com R13
  com R12
  com R11
  com R10
  add R10, R31
  adc R11, R1
  adc R12, R1
  adc R13, R1
  adc R14, R1
  adc R15, R1
  adc R16, R1
  adc R17, R1
  pos2:

{$ifdef CPUAVR_HAS_JMP_CALL}
  call fpc_div_qword
{$else  CPUAVR_HAS_JMP_CALL}
  rcall fpc_div_qword
{$endif CPUAVR_HAS_JMP_CALL}

  sbrs R30, 7     // skip if bit 7 is cleared (result should be positive)
  rjmp fin
  com R25         // result from FPC_DIV_WORD in R25 ... R22
  com R24
  com R23
  com R22
  com R21
  com R20
  com R19
  com R18

  ldi R31, 1
  add R18, R31    // add 1
  adc R19, R1     // add carry bit
  adc R20, R1
  adc R21, R1
  adc R22, R1
  adc R23, R1
  adc R24, R1
  adc R25, R1
  fin:
end;

{$define FPC_SYSTEM_HAS_MOD_INT64}
function fpc_mod_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_MOD_INT64']; compilerproc;
label
  pos1, pos2, fin;
asm
// Convert n, z to unsigned int, then call mod_qword,
// Restore sign if high bits of n xor z is negative
// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
//         neg_result  R30
//         one         R31

  mov R30, R17  // store hi8(z)

  // convert n to absolute
  ldi R31, 1
  sub R25, r1     // subtract 0, just to check sign flag
  brpl pos1
  com R25
  com R24
  com R23
  com R22
  com R21
  com R20
  com R19
  com R18
  add R18, R31    // add 1
  adc R19, R1     // add carry bit
  adc R20, R1
  adc R21, R1
  adc R22, R1
  adc R23, R1
  adc R24, R1
  adc R25, R1
  pos1:

  sub R17, R1
  brpl pos2
  com R17
  com R16
  com R15
  com R14
  com R13
  com R12
  com R11
  com R10
  add R10, R31
  adc R11, R1
  adc R12, R1
  adc R13, R1
  adc R14, R1
  adc R15, R1
  adc R16, R1
  adc R17, R1
  pos2:

{$ifdef CPUAVR_HAS_JMP_CALL}
  call fpc_mod_qword
{$else  CPUAVR_HAS_JMP_CALL}
  rcall fpc_mod_qword
{$endif CPUAVR_HAS_JMP_CALL}

  sbrs R30, 7     // Not finished if sign bit is set
  rjmp fin
  com R25         // Convert to 2's complement
  com R24         // Complement all bits...
  com R23
  com R22
  com R21
  com R20
  com R19
  com R18
  ldi R31, 1
  add R18, R31    // ...and add 1 to answer
  adc R19, R1
  adc R20, R1
  adc R21, R1
  adc R22, R1
  adc R23, R1
  adc R24, R1
  adc R25, R1
  fin:
end;
{$endif CPUAVR_16_REGS}