diff --git a/CMakeLists.txt b/CMakeLists.txt index 45fcf61126..24b0b1d737 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -170,6 +170,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") elseif((CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")) # segmentation fault in libcontext set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-gcse") + elseif((CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")) + # RISC-V specific optimizations + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=rv64gc") endif() if(NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0)) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-aligned-new") diff --git a/Makefile b/Makefile index 16a10ae2ea..abe029e360 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,10 @@ ifeq ($(shell test $(GCC_VERSION) -ge 40400; echo $$?),0) CXXFLAGS+=-msse4 -msse4.2 endif endif +# RISC-V specific optimizations +ifeq ($(shell uname -m),riscv64) + CXXFLAGS+=-march=rv64gc +endif #not solved yet ifeq ($(CC),gcc) ifeq ($(shell test $(GCC_VERSION) -ge 70000; echo $$?),0) diff --git a/src/bthread/context.cpp b/src/bthread/context.cpp index bafa927d78..b7be731eae 100644 --- a/src/bthread/context.cpp +++ b/src/bthread/context.cpp @@ -900,3 +900,91 @@ __asm ( ); #endif + +#if defined(BTHREAD_CONTEXT_PLATFORM_linux_riscv64) && defined(BTHREAD_CONTEXT_COMPILER_gcc) +__asm ( +".text\n" +".align 3\n" +".global bthread_jump_fcontext\n" +".type bthread_jump_fcontext, %function\n" +"bthread_jump_fcontext:\n" +" addi sp, sp, -160\n" +" # save callee-saved registers\n" +" sd s0, 64(sp)\n" +" sd s1, 72(sp)\n" +" sd s2, 80(sp)\n" +" sd s3, 88(sp)\n" +" sd s4, 96(sp)\n" +" sd s5, 104(sp)\n" +" sd s6, 112(sp)\n" +" sd s7, 120(sp)\n" +" sd s8, 128(sp)\n" +" sd s9, 136(sp)\n" +" sd s10, 144(sp)\n" +" sd s11, 152(sp)\n" +" sd ra, 0(sp)\n" +" sd fp, 8(sp)\n" +" # save floating point registers\n" +" fsd fs0, 16(sp)\n" +" fsd fs1, 24(sp)\n" +" fsd fs2, 32(sp)\n" +" fsd fs3, 40(sp)\n" +" fsd fs4, 48(sp)\n" +" fsd fs5, 56(sp)\n" +" # store current stack pointer\n" +" sd sp, 0(a0)\n" +" # load new stack pointer\n" +" mv sp, a1\n" +" # restore floating point registers\n" +" fld fs0, 16(sp)\n" +" fld fs1, 24(sp)\n" +" fld fs2, 32(sp)\n" +" fld fs3, 40(sp)\n" +" fld fs4, 48(sp)\n" +" fld fs5, 56(sp)\n" +" # restore callee-saved registers\n" +" ld s0, 64(sp)\n" +" ld s1, 72(sp)\n" +" ld s2, 80(sp)\n" +" ld s3, 88(sp)\n" +" ld s4, 96(sp)\n" +" ld s5, 104(sp)\n" +" ld s6, 112(sp)\n" +" ld s7, 120(sp)\n" +" ld s8, 128(sp)\n" +" ld s9, 136(sp)\n" +" ld s10, 144(sp)\n" +" ld s11, 152(sp)\n" +" ld ra, 0(sp)\n" +" ld fp, 8(sp)\n" +" # restore stack pointer\n" +" addi sp, sp, 160\n" +" # return value in a0\n" +" mv a0, a2\n" +" # jump to new context\n" +" ret\n" +); + +__asm ( +".text\n" +".align 3\n" +".global bthread_make_fcontext\n" +".type bthread_make_fcontext, %function\n" +"bthread_make_fcontext:\n" +" # align stack to 16-byte boundary\n" +" andi a0, a0, -16\n" +" addi a0, a0, -160\n" +" # store function pointer at the top of stack\n" +" sd a2, 0(a0)\n" +" # store finish function address\n" +" la t0, finish\n" +" sd t0, 8(a0)\n" +" # return pointer to context data\n" +" ret\n" +"finish:\n" +" # exit with code 0\n" +" li a0, 0\n" +" # call exit\n" +" call _exit\n" +); +#endif diff --git a/src/bthread/context.h b/src/bthread/context.h index 8de85af626..149c7672a3 100644 --- a/src/bthread/context.h +++ b/src/bthread/context.h @@ -42,6 +42,9 @@ #elif __loongarch64 #define BTHREAD_CONTEXT_PLATFORM_linux_loongarch64 #define BTHREAD_CONTEXT_CALL_CONVENTION + #elif __riscv + #define BTHREAD_CONTEXT_PLATFORM_linux_riscv64 + #define BTHREAD_CONTEXT_CALL_CONVENTION #endif #elif defined(__MINGW32__) || defined (__MINGW64__) diff --git a/src/bthread/processor.h b/src/bthread/processor.h index f8939234e8..246c8b936f 100644 --- a/src/bthread/processor.h +++ b/src/bthread/processor.h @@ -28,6 +28,8 @@ # ifndef cpu_relax #if defined(ARCH_CPU_ARM_FAMILY) # define cpu_relax() asm volatile("yield\n": : :"memory") +#elif defined(ARCH_CPU_RISCV_FAMILY) +# define cpu_relax() asm volatile("fence.i\n": : :"memory") #elif defined(ARCH_CPU_LOONGARCH64_FAMILY) # define cpu_relax() asm volatile("nop\n": : :"memory"); #else diff --git a/src/bthread/task_group.cpp b/src/bthread/task_group.cpp index 67f029a02b..4e35d738f1 100644 --- a/src/bthread/task_group.cpp +++ b/src/bthread/task_group.cpp @@ -100,7 +100,8 @@ AtomicInteger128::Value AtomicInteger128::load() const { #endif // __x86_64__ return {value[0], value[1]}; #else // __x86_64__ || __ARM_NEON - BAIDU_SCOPED_LOCK(_mutex); + // RISC-V and other architectures use mutex fallback + BAIDU_SCOPED_LOCK(const_cast(_mutex)); return _value; #endif // __x86_64__ || __ARM_NEON } @@ -113,7 +114,8 @@ void AtomicInteger128::store(Value value) { int64x2_t v = vld1q_s64(reinterpret_cast(&value)); vst1q_s64(reinterpret_cast(&_value), v); #else - BAIDU_SCOPED_LOCK(_mutex); + // RISC-V and other architectures use mutex fallback + BAIDU_SCOPED_LOCK(const_cast(_mutex)); _value = value; #endif // __x86_64__ || __ARM_NEON } diff --git a/src/bthread/task_group.h b/src/bthread/task_group.h index 958f81d7d6..f3672b86d9 100644 --- a/src/bthread/task_group.h +++ b/src/bthread/task_group.h @@ -73,7 +73,7 @@ class AtomicInteger128 { private: Value _value{}; - // Used to protect `_cpu_time_stat' when __x86_64__ and __ARM_NEON is not defined. + // Used to protect `_cpu_time_stat' when __x86_64__, __ARM_NEON, and __riscv is not defined. FastPthreadMutex _mutex; }; diff --git a/src/butil/atomicops.h b/src/butil/atomicops.h index cda1529f40..7ee383770f 100644 --- a/src/butil/atomicops.h +++ b/src/butil/atomicops.h @@ -157,6 +157,8 @@ Atomic64 Release_Load(volatile const Atomic64* ptr); #include "butil/atomicops_internals_mips_gcc.h" #elif defined(COMPILER_GCC) && defined(ARCH_CPU_LOONGARCH64_FAMILY) #include "butil/atomicops_internals_loongarch64_gcc.h" +#elif defined(COMPILER_GCC) && defined(ARCH_CPU_RISCV_FAMILY) +#include "butil/atomicops_internals_riscv_gcc.h" #else #error "Atomic operations are not supported on your platform" #endif diff --git a/src/butil/atomicops_internals_riscv_gcc.h b/src/butil/atomicops_internals_riscv_gcc.h new file mode 100644 index 0000000000..e7bd78bbed --- /dev/null +++ b/src/butil/atomicops_internals_riscv_gcc.h @@ -0,0 +1,192 @@ +// Copyright 2024 The Apache Software Foundation. All rights reserved. +// Use of this source code is governed by the Apache License, Version 2.0 +// that can be found in the LICENSE file. + +// This file is an internal atomic implementation, use butil/atomicops.h instead. +// RISC-V architecture specific atomic operations implementation using GCC intrinsics. + +#ifndef BUTIL_ATOMICOPS_INTERNALS_RISCV_GCC_H_ +#define BUTIL_ATOMICOPS_INTERNALS_RISCV_GCC_H_ + +namespace butil { +namespace subtle { + +inline void MemoryBarrier() { + __asm__ __volatile__ ("fence" ::: "memory"); // NOLINT +} + +// RISC-V atomic operations using GCC built-in functions +// These are implemented using the standard GCC atomic built-ins which +// are supported on RISC-V since GCC 7.1+ + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (__sync_bool_compare_and_swap(ptr, old_value, new_value)) + return old_value; + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!__sync_bool_compare_and_swap(ptr, old_value, new_value)); + return old_value; +} + +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + return Barrier_AtomicIncrement(ptr, increment); +} + +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + for (;;) { + // Atomic exchange the old value with an incremented one. + Atomic32 old_value = *ptr; + Atomic32 new_value = old_value + increment; + if (__sync_bool_compare_and_swap(ptr, old_value, new_value)) { + // The exchange took place as expected. + return new_value; + } + // Otherwise, *ptr changed mid-loop and we need to retry. + } +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + // Since NoBarrier_CompareAndSwap uses __sync_bool_compare_and_swap, which + // is a full memory barrier, none is needed here or below in Release. + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit versions of the operations. +// See the 32-bit versions for comments. + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (__sync_bool_compare_and_swap(ptr, old_value, new_value)) + return old_value; + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!__sync_bool_compare_and_swap(ptr, old_value, new_value)); + return old_value; +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + return Barrier_AtomicIncrement(ptr, increment); +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + for (;;) { + // Atomic exchange the old value with an incremented one. + Atomic64 old_value = *ptr; + Atomic64 new_value = old_value + increment; + if (__sync_bool_compare_and_swap(ptr, old_value, new_value)) { + // The exchange took place as expected. + return new_value; + } + // Otherwise, *ptr changed mid-loop and we need to retry. + } +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return *ptr; +} + +} // namespace butil::subtle +} // namespace butil + +#endif // BUTIL_ATOMICOPS_INTERNALS_RISCV_GCC_H_ diff --git a/src/butil/build_config.h b/src/butil/build_config.h index 5ddf38215e..18d449b44b 100644 --- a/src/butil/build_config.h +++ b/src/butil/build_config.h @@ -138,6 +138,16 @@ #define ARCH_CPU_LOONGARCH64 1 #define ARCH_CPU_64_BITS 1 #define ARCH_CPU_LITTLE_ENDIAN 1 +#elif defined(__riscv) +#define ARCH_CPU_RISCV_FAMILY 1 +#if defined(__riscv_xlen) && (__riscv_xlen == 64) +#define ARCH_CPU_RISCV64 1 +#define ARCH_CPU_64_BITS 1 +#else +#define ARCH_CPU_RISCV32 1 +#define ARCH_CPU_32_BITS 1 +#endif +#define ARCH_CPU_LITTLE_ENDIAN 1 #else #error Please add support for your architecture in butil/build_config.h #endif diff --git a/src/butil/time.h b/src/butil/time.h index 8b85699840..c57000ea99 100644 --- a/src/butil/time.h +++ b/src/butil/time.h @@ -254,6 +254,13 @@ inline uint64_t clock_cycles() { : "=r" (stable_counter), "=r" (counter_id) ); return stable_counter; +#elif defined(__riscv) + uint64_t cycles; + __asm__ __volatile__ ( + "rdcycle %0" + : "=r" (cycles) + ); + return cycles; #else #error "unsupported arch" #endif