Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
elseif((CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64"))
# segmentation fault in libcontext
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-gcse")
elseif((CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64"))
Comment thread
Dayuxiaoshui marked this conversation as resolved.
# RISC-V specific optimizations
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=rv64gc")
endif()
if(NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0))
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-aligned-new")
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ ifeq ($(shell test $(GCC_VERSION) -ge 40400; echo $$?),0)
CXXFLAGS+=-msse4 -msse4.2
endif
endif
# RISC-V specific optimizations
ifeq ($(shell uname -m),riscv64)
CXXFLAGS+=-march=rv64gc
endif
#not solved yet
ifeq ($(CC),gcc)
ifeq ($(shell test $(GCC_VERSION) -ge 70000; echo $$?),0)
Expand Down
88 changes: 88 additions & 0 deletions src/bthread/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -900,3 +900,91 @@ __asm (
);

#endif

#if defined(BTHREAD_CONTEXT_PLATFORM_linux_riscv64) && defined(BTHREAD_CONTEXT_COMPILER_gcc)
__asm (
".text\n"
".align 3\n"
".global bthread_jump_fcontext\n"
".type bthread_jump_fcontext, %function\n"
"bthread_jump_fcontext:\n"
" addi sp, sp, -160\n"
" # save callee-saved registers\n"
" sd s0, 64(sp)\n"
" sd s1, 72(sp)\n"
" sd s2, 80(sp)\n"
" sd s3, 88(sp)\n"
" sd s4, 96(sp)\n"
" sd s5, 104(sp)\n"
" sd s6, 112(sp)\n"
" sd s7, 120(sp)\n"
" sd s8, 128(sp)\n"
" sd s9, 136(sp)\n"
" sd s10, 144(sp)\n"
" sd s11, 152(sp)\n"
" sd ra, 0(sp)\n"
" sd fp, 8(sp)\n"
" # save floating point registers\n"
" fsd fs0, 16(sp)\n"
" fsd fs1, 24(sp)\n"
" fsd fs2, 32(sp)\n"
" fsd fs3, 40(sp)\n"
" fsd fs4, 48(sp)\n"
" fsd fs5, 56(sp)\n"
" # store current stack pointer\n"
" sd sp, 0(a0)\n"
" # load new stack pointer\n"
" mv sp, a1\n"
" # restore floating point registers\n"
" fld fs0, 16(sp)\n"
" fld fs1, 24(sp)\n"
" fld fs2, 32(sp)\n"
" fld fs3, 40(sp)\n"
" fld fs4, 48(sp)\n"
" fld fs5, 56(sp)\n"
" # restore callee-saved registers\n"
" ld s0, 64(sp)\n"
" ld s1, 72(sp)\n"
" ld s2, 80(sp)\n"
" ld s3, 88(sp)\n"
" ld s4, 96(sp)\n"
" ld s5, 104(sp)\n"
" ld s6, 112(sp)\n"
" ld s7, 120(sp)\n"
" ld s8, 128(sp)\n"
" ld s9, 136(sp)\n"
" ld s10, 144(sp)\n"
" ld s11, 152(sp)\n"
" ld ra, 0(sp)\n"
" ld fp, 8(sp)\n"
" # restore stack pointer\n"
" addi sp, sp, 160\n"
" # return value in a0\n"
" mv a0, a2\n"
" # jump to new context\n"
" ret\n"
);

__asm (
".text\n"
".align 3\n"
".global bthread_make_fcontext\n"
".type bthread_make_fcontext, %function\n"
"bthread_make_fcontext:\n"
" # align stack to 16-byte boundary\n"
" andi a0, a0, -16\n"
" addi a0, a0, -160\n"
" # store function pointer at the top of stack\n"
" sd a2, 0(a0)\n"
" # store finish function address\n"
" la t0, finish\n"
" sd t0, 8(a0)\n"
" # return pointer to context data\n"
" ret\n"
"finish:\n"
" # exit with code 0\n"
" li a0, 0\n"
" # call exit\n"
" call _exit\n"
);
#endif
3 changes: 3 additions & 0 deletions src/bthread/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
#elif __loongarch64
#define BTHREAD_CONTEXT_PLATFORM_linux_loongarch64
#define BTHREAD_CONTEXT_CALL_CONVENTION
#elif __riscv
#define BTHREAD_CONTEXT_PLATFORM_linux_riscv64
#define BTHREAD_CONTEXT_CALL_CONVENTION
#endif

#elif defined(__MINGW32__) || defined (__MINGW64__)
Expand Down
2 changes: 2 additions & 0 deletions src/bthread/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
# ifndef cpu_relax
#if defined(ARCH_CPU_ARM_FAMILY)
# define cpu_relax() asm volatile("yield\n": : :"memory")
#elif defined(ARCH_CPU_RISCV_FAMILY)
# define cpu_relax() asm volatile("fence.i\n": : :"memory")
#elif defined(ARCH_CPU_LOONGARCH64_FAMILY)
# define cpu_relax() asm volatile("nop\n": : :"memory");
#else
Expand Down
6 changes: 4 additions & 2 deletions src/bthread/task_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ AtomicInteger128::Value AtomicInteger128::load() const {
#endif // __x86_64__
return {value[0], value[1]};
#else // __x86_64__ || __ARM_NEON
BAIDU_SCOPED_LOCK(_mutex);
// RISC-V and other architectures use mutex fallback
BAIDU_SCOPED_LOCK(const_cast<FastPthreadMutex&>(_mutex));
return _value;
#endif // __x86_64__ || __ARM_NEON
}
Expand All @@ -113,7 +114,8 @@ void AtomicInteger128::store(Value value) {
int64x2_t v = vld1q_s64(reinterpret_cast<int64_t*>(&value));
vst1q_s64(reinterpret_cast<int64_t*>(&_value), v);
#else
BAIDU_SCOPED_LOCK(_mutex);
// RISC-V and other architectures use mutex fallback
BAIDU_SCOPED_LOCK(const_cast<FastPthreadMutex&>(_mutex));
_value = value;
#endif // __x86_64__ || __ARM_NEON
}
Expand Down
2 changes: 1 addition & 1 deletion src/bthread/task_group.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class AtomicInteger128 {

private:
Value _value{};
// Used to protect `_cpu_time_stat' when __x86_64__ and __ARM_NEON is not defined.
// Used to protect `_cpu_time_stat' when __x86_64__, __ARM_NEON, and __riscv is not defined.
FastPthreadMutex _mutex;
};

Expand Down
2 changes: 2 additions & 0 deletions src/butil/atomicops.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ Atomic64 Release_Load(volatile const Atomic64* ptr);
#include "butil/atomicops_internals_mips_gcc.h"
#elif defined(COMPILER_GCC) && defined(ARCH_CPU_LOONGARCH64_FAMILY)
#include "butil/atomicops_internals_loongarch64_gcc.h"
#elif defined(COMPILER_GCC) && defined(ARCH_CPU_RISCV_FAMILY)
#include "butil/atomicops_internals_riscv_gcc.h"
#else
#error "Atomic operations are not supported on your platform"
#endif
Expand Down
192 changes: 192 additions & 0 deletions src/butil/atomicops_internals_riscv_gcc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
// Copyright 2024 The Apache Software Foundation. All rights reserved.
// Use of this source code is governed by the Apache License, Version 2.0
// that can be found in the LICENSE file.

// This file is an internal atomic implementation, use butil/atomicops.h instead.
// RISC-V architecture specific atomic operations implementation using GCC intrinsics.

#ifndef BUTIL_ATOMICOPS_INTERNALS_RISCV_GCC_H_
#define BUTIL_ATOMICOPS_INTERNALS_RISCV_GCC_H_

namespace butil {
namespace subtle {

inline void MemoryBarrier() {
__asm__ __volatile__ ("fence" ::: "memory"); // NOLINT
}

// RISC-V atomic operations using GCC built-in functions
// These are implemented using the standard GCC atomic built-ins which
// are supported on RISC-V since GCC 7.1+

inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
Atomic32 old_value,
Atomic32 new_value) {
Atomic32 prev_value;
do {
if (__sync_bool_compare_and_swap(ptr, old_value, new_value))
return old_value;
prev_value = *ptr;
} while (prev_value == old_value);
return prev_value;
}

inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
Atomic32 new_value) {
Atomic32 old_value;
do {
old_value = *ptr;
} while (!__sync_bool_compare_and_swap(ptr, old_value, new_value));
return old_value;
}

inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
Atomic32 increment) {
return Barrier_AtomicIncrement(ptr, increment);
}

inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
Atomic32 increment) {
for (;;) {
// Atomic exchange the old value with an incremented one.
Atomic32 old_value = *ptr;
Atomic32 new_value = old_value + increment;
if (__sync_bool_compare_and_swap(ptr, old_value, new_value)) {
// The exchange took place as expected.
return new_value;
}
// Otherwise, *ptr changed mid-loop and we need to retry.
}
}

inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
Atomic32 old_value,
Atomic32 new_value) {
// Since NoBarrier_CompareAndSwap uses __sync_bool_compare_and_swap, which
// is a full memory barrier, none is needed here or below in Release.
return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
}

inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
Atomic32 old_value,
Atomic32 new_value) {
return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
}

inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
*ptr = value;
}

inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
*ptr = value;
MemoryBarrier();
}

inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
MemoryBarrier();
*ptr = value;
}

inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
return *ptr;
}

inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
Atomic32 value = *ptr;
MemoryBarrier();
return value;
}

inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
MemoryBarrier();
return *ptr;
}

// 64-bit versions of the operations.
// See the 32-bit versions for comments.

inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
Atomic64 old_value,
Atomic64 new_value) {
Atomic64 prev_value;
do {
if (__sync_bool_compare_and_swap(ptr, old_value, new_value))
return old_value;
prev_value = *ptr;
} while (prev_value == old_value);
return prev_value;
}

inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
Atomic64 new_value) {
Atomic64 old_value;
do {
old_value = *ptr;
} while (!__sync_bool_compare_and_swap(ptr, old_value, new_value));
return old_value;
}

inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
return Barrier_AtomicIncrement(ptr, increment);
}

inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
for (;;) {
// Atomic exchange the old value with an incremented one.
Atomic64 old_value = *ptr;
Atomic64 new_value = old_value + increment;
if (__sync_bool_compare_and_swap(ptr, old_value, new_value)) {
// The exchange took place as expected.
return new_value;
}
// Otherwise, *ptr changed mid-loop and we need to retry.
}
}

inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
Atomic64 old_value,
Atomic64 new_value) {
return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
}

inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
Atomic64 old_value,
Atomic64 new_value) {
return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
}

inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
*ptr = value;
}

inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
*ptr = value;
MemoryBarrier();
}

inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
MemoryBarrier();
*ptr = value;
}

inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
return *ptr;
}

inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
Atomic64 value = *ptr;
MemoryBarrier();
return value;
}

inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
MemoryBarrier();
return *ptr;
}

} // namespace butil::subtle
} // namespace butil

#endif // BUTIL_ATOMICOPS_INTERNALS_RISCV_GCC_H_
10 changes: 10 additions & 0 deletions src/butil/build_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,16 @@
#define ARCH_CPU_LOONGARCH64 1
#define ARCH_CPU_64_BITS 1
#define ARCH_CPU_LITTLE_ENDIAN 1
#elif defined(__riscv)
#define ARCH_CPU_RISCV_FAMILY 1
#if defined(__riscv_xlen) && (__riscv_xlen == 64)
#define ARCH_CPU_RISCV64 1
#define ARCH_CPU_64_BITS 1
#else
#define ARCH_CPU_RISCV32 1
#define ARCH_CPU_32_BITS 1
#endif
#define ARCH_CPU_LITTLE_ENDIAN 1
#else
#error Please add support for your architecture in butil/build_config.h
#endif
Expand Down
7 changes: 7 additions & 0 deletions src/butil/time.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,13 @@ inline uint64_t clock_cycles() {
: "=r" (stable_counter), "=r" (counter_id)
);
return stable_counter;
#elif defined(__riscv)
uint64_t cycles;
__asm__ __volatile__ (
"rdcycle %0"
: "=r" (cycles)
);
return cycles;
#else
#error "unsupported arch"
#endif
Expand Down
Loading