Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# do not show up detailed difference on GitHub
source/3rdparty/* linguist-generated=true
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,26 @@ repos:
hooks:
- id: isort
files: \.py$
exclude: ^source/3rdparty
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.0.291
hooks:
- id: ruff
args: ["--fix"]
exclude: ^source/3rdparty
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 23.9.1
hooks:
- id: black-jupyter
exclude: ^source/3rdparty
# numpydoc
- repo: https://github.com/Carreau/velin
rev: 0.0.12
hooks:
- id: velin
args: ["--write"]
exclude: ^source/3rdparty
# Python inside docs
- repo: https://github.com/asottile/blacken-docs
rev: 1.16.0
Expand Down Expand Up @@ -102,6 +106,7 @@ repos:
- --comment-style
- "#"
- --no-extra-eol
exclude: ^source/3rdparty
# HTML
- id: insert-license
files: \.(html|vue|xml)$
Expand Down
3 changes: 3 additions & 0 deletions source/3rdparty/implib/arch/aarch64/config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[Arch]
PointerSize = 8
SymbolReloc = R_AARCH64_ABS64
77 changes: 77 additions & 0 deletions source/3rdparty/implib/arch/aarch64/table.S.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Copyright 2018-2020 Yury Gribov
*
* The MIT License (MIT)
*
* Use of this source code is governed by MIT license that can be
* found in the LICENSE.txt file.
*/

#define lr x30
#define ip0 x16

.data

.globl _${lib_suffix}_tramp_table
.hidden _${lib_suffix}_tramp_table
.align 8
_${lib_suffix}_tramp_table:
.zero $table_size

.text

.globl _${lib_suffix}_tramp_resolve
.hidden _${lib_suffix}_tramp_resolve

.globl _${lib_suffix}_save_regs_and_resolve
.hidden _${lib_suffix}_save_regs_and_resolve
.type _${lib_suffix}_save_regs_and_resolve, %function
_${lib_suffix}_save_regs_and_resolve:
.cfi_startproc

// Slow path which calls dlsym, taken only on first call.
// Registers are saved according to "Procedure Call Standard for the Arm® 64-bit Architecture".
// For DWARF directives, read https://www.imperialviolet.org/2017/01/18/cfi.html.

// Stack is aligned at 16 bytes

#define PUSH_PAIR(reg1, reg2) stp reg1, reg2, [sp, #-16]!; .cfi_adjust_cfa_offset 16; .cfi_rel_offset reg1, 0; .cfi_rel_offset reg2, 8
#define POP_PAIR(reg1, reg2) ldp reg1, reg2, [sp], #16; .cfi_adjust_cfa_offset -16; .cfi_restore reg2; .cfi_restore reg1

#define PUSH_WIDE_PAIR(reg1, reg2) stp reg1, reg2, [sp, #-32]!; .cfi_adjust_cfa_offset 32; .cfi_rel_offset reg1, 0; .cfi_rel_offset reg2, 16
#define POP_WIDE_PAIR(reg1, reg2) ldp reg1, reg2, [sp], #32; .cfi_adjust_cfa_offset -32; .cfi_restore reg2; .cfi_restore reg1

// Save only arguments (and lr)
PUSH_PAIR(x0, x1)
PUSH_PAIR(x2, x3)
PUSH_PAIR(x4, x5)
PUSH_PAIR(x6, x7)
PUSH_PAIR(x8, lr)

ldr x0, [sp, #80] // 16*5

PUSH_WIDE_PAIR(q0, q1)
PUSH_WIDE_PAIR(q2, q3)
PUSH_WIDE_PAIR(q4, q5)
PUSH_WIDE_PAIR(q6, q7)

// Stack is aligned at 16 bytes

bl _${lib_suffix}_tramp_resolve

// TODO: pop pc?

POP_WIDE_PAIR(q6, q7)
POP_WIDE_PAIR(q4, q5)
POP_WIDE_PAIR(q2, q3)
POP_WIDE_PAIR(q0, q1)

POP_PAIR(x8, lr)
POP_PAIR(x6, x7)
POP_PAIR(x4, x5)
POP_PAIR(x2, x3)
POP_PAIR(x0, x1)

br lr

.cfi_endproc
40 changes: 40 additions & 0 deletions source/3rdparty/implib/arch/aarch64/trampoline.S.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright 2018-2023 Yury Gribov
*
* The MIT License (MIT)
*
* Use of this source code is governed by MIT license that can be
* found in the LICENSE.txt file.
*/

.globl $sym
.p2align 4
.type $sym, %function
#ifndef IMPLIB_EXPORT_SHIMS
.hidden $sym
#endif
$sym:
.cfi_startproc

1:
// Load address
// TODO: can we do this faster on newer ARMs?
adrp ip0, _${lib_suffix}_tramp_table+$offset
ldr ip0, [ip0, #:lo12:_${lib_suffix}_tramp_table+$offset]

cbz ip0, 2f

// Fast path
br ip0

2:
// Slow path
mov ip0, $number & 0xffff
#if $number > 0xffff
movk ip0, $number >> 16, lsl #16
#endif
stp ip0, lr, [sp, #-16]!; .cfi_adjust_cfa_offset 16; .cfi_rel_offset ip0, 0; .cfi_rel_offset lr, 8;
bl _${lib_suffix}_save_regs_and_resolve
ldp ip0, lr, [sp], #16; .cfi_adjust_cfa_offset -16; .cfi_restore lr; .cfi_restore ip0
b 1b
.cfi_endproc
3 changes: 3 additions & 0 deletions source/3rdparty/implib/arch/arm/config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[Arch]
PointerSize = 4
SymbolReloc = R_ARM_ABS32
88 changes: 88 additions & 0 deletions source/3rdparty/implib/arch/arm/table.S.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright 2018-2022 Yury Gribov
*
* The MIT License (MIT)
*
* Use of this source code is governed by MIT license that can be
* found in the LICENSE.txt file.
*/

.data

.globl _${lib_suffix}_tramp_table
.hidden _${lib_suffix}_tramp_table
.align 4
_${lib_suffix}_tramp_table:
.zero $table_size

.text

.globl _${lib_suffix}_tramp_resolve
.hidden _${lib_suffix}_tramp_resolve

.globl _${lib_suffix}_save_regs_and_resolve
.hidden _${lib_suffix}_save_regs_and_resolve
.type _${lib_suffix}_save_regs_and_resolve, %function
_${lib_suffix}_save_regs_and_resolve:
.cfi_startproc

#define PUSH_REG(reg) push {reg}; .cfi_adjust_cfa_offset 4; .cfi_rel_offset reg, 0
#define POP_REG(reg) pop {reg} ; .cfi_adjust_cfa_offset -4; .cfi_restore reg

// Binutils 2.30 does not like q0 in .cfi_rel_offset
#define PUSH_DREG_PAIR(reg1, reg2) vpush {reg1, reg2}; .cfi_adjust_cfa_offset 16; .cfi_rel_offset reg1, 0; .cfi_rel_offset reg2, 8
#define POP_DREG_PAIR(reg1, reg2) vpop {reg1, reg2}; .cfi_adjust_cfa_offset -16; .cfi_restore reg1; .cfi_restore reg2

// Slow path which calls dlsym, taken only on first call.
// Registers are saved acc. to "Procedure Call Standard for the ARM Architecture".
// For DWARF directives, read https://www.imperialviolet.org/2017/01/18/cfi.html.

// Stack is aligned at 16 bytes at this point

// Save only arguments (and lr)
PUSH_REG(r0)
ldr r0, [sp, #8]
PUSH_REG(r1)
PUSH_REG(r2)
PUSH_REG(r3)
PUSH_REG(lr)
PUSH_REG(lr) // Align to 8 bytes

// Arguments can be passed in VFP registers only when hard-float ABI is used
// for arm-gnueabihf target // (http://android-doc.github.io/ndk/guides/abis.html#v7a).
// Use compiler macro to detect this case.
#ifdef __ARM_PCS_VFP
PUSH_DREG_PAIR(d0, d1)
PUSH_DREG_PAIR(d2, d3)
PUSH_DREG_PAIR(d4, d5)
PUSH_DREG_PAIR(d6, d7)
PUSH_DREG_PAIR(d8, d9)
PUSH_DREG_PAIR(d10, d11)
PUSH_DREG_PAIR(d12, d13)
PUSH_DREG_PAIR(d14, d15)
// FIXME: NEON actually supports 32 D-registers but it's unclear how to detect this
#endif

bl _${lib_suffix}_tramp_resolve(PLT)

#ifdef __ARM_PCS_VFP
POP_DREG_PAIR(d14, d15)
POP_DREG_PAIR(d12, d13)
POP_DREG_PAIR(d10, d11)
POP_DREG_PAIR(d8, d9)
POP_DREG_PAIR(d6, d7)
POP_DREG_PAIR(d4, d5)
POP_DREG_PAIR(d2, d3)
POP_DREG_PAIR(d0, d1)
#endif

POP_REG(lr) // TODO: pop pc?
POP_REG(lr)
POP_REG(r3)
POP_REG(r2)
POP_REG(r1)
POP_REG(r0)

bx lr

.cfi_endproc
49 changes: 49 additions & 0 deletions source/3rdparty/implib/arch/arm/trampoline.S.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright 2018-2023 Yury Gribov
*
* The MIT License (MIT)
*
* Use of this source code is governed by MIT license that can be
* found in the LICENSE.txt file.
*/

.globl $sym
.p2align 4
.type $sym, %function
#ifndef IMPLIB_EXPORT_SHIMS
.hidden $sym
#endif
$sym:
.cfi_startproc

1:
// Load address
// TODO: can we do this faster on newer ARMs?
ldr ip, 3f
2:
add ip, pc, ip
ldr ip, [ip]

cmp ip, #0

// Fast path
bxne ip

// Slow path
ldr ip, =$number
push {ip}
.cfi_adjust_cfa_offset 4
PUSH_REG(lr)
bl _${lib_suffix}_save_regs_and_resolve
POP_REG(lr)
add sp, #4
.cfi_adjust_cfa_offset -4
b 1b

// Force constant pool for ldr above
.ltorg

.cfi_endproc

3:
.word _${lib_suffix}_tramp_table - (2b + 8) + $offset
Loading