diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index f4acf9c2e90f..382818a7197a 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -41,8 +41,8 @@ Support Architectures ~~~~~~~~~~~~~ -Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and -xtensa, and the tag-based KASAN modes are supported only on arm64. +Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, xtensa, +and loongarch, and the tag-based KASAN modes are supported only on arm64. Compilers ~~~~~~~~~ diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt index bf0124fae643..c4581c2edb28 100644 --- a/Documentation/features/debug/KASAN/arch-support.txt +++ b/Documentation/features/debug/KASAN/arch-support.txt @@ -13,7 +13,7 @@ | csky: | TODO | | hexagon: | TODO | | ia64: | TODO | - | loongarch: | TODO | + | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | | mips: | TODO | diff --git a/Documentation/features/debug/kcov/arch-support.txt b/Documentation/features/debug/kcov/arch-support.txt index ffcc9f2b1d74..de84cefbcdd3 100644 --- a/Documentation/features/debug/kcov/arch-support.txt +++ b/Documentation/features/debug/kcov/arch-support.txt @@ -13,7 +13,7 @@ | csky: | TODO | | hexagon: | TODO | | ia64: | TODO | - | loongarch: | TODO | + | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | | mips: | ok | diff --git a/Documentation/features/debug/kgdb/arch-support.txt b/Documentation/features/debug/kgdb/arch-support.txt index 958498f9f2a4..5e91ec78c80b 100644 --- a/Documentation/features/debug/kgdb/arch-support.txt +++ b/Documentation/features/debug/kgdb/arch-support.txt @@ -13,7 +13,7 @@ | csky: | TODO | | hexagon: | ok | | ia64: | TODO | - | loongarch: | TODO | + | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | | mips: | ok | diff --git a/Documentation/translations/zh_CN/dev-tools/kasan.rst b/Documentation/translations/zh_CN/dev-tools/kasan.rst index 05ef904dbcfb..8fdb20c9665b 100644 --- a/Documentation/translations/zh_CN/dev-tools/kasan.rst +++ b/Documentation/translations/zh_CN/dev-tools/kasan.rst @@ -42,7 +42,7 @@ KASAN有三种模式: 体系架构 ~~~~~~~~ -在x86_64、arm、arm64、powerpc、riscv、s390和xtensa上支持通用KASAN, +在x86_64、arm、arm64、powerpc、riscv、s390、xtensa和loongarch上支持通用KASAN, 而基于标签的KASAN模式只在arm64上支持。 编译器 diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index ecf282dee513..e14396a2ddcb 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -8,11 +8,13 @@ config LOONGARCH select ACPI_PPTT if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_BINFMT_ELF_STATE + select ARCH_DISABLE_KASAN_INLINE select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_KCOV select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL @@ -91,6 +93,9 @@ config LOONGARCH select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE + select HAVE_ARCH_KASAN + select HAVE_ARCH_KFENCE + select HAVE_ARCH_KGDB if PERF_EVENTS select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK @@ -115,6 +120,7 @@ config LOONGARCH select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER + select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IOREMAP_PROT @@ -254,6 +260,9 @@ config AS_HAS_LSX_EXTENSION config AS_HAS_LASX_EXTENSION def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0) +config AS_HAS_LBT_EXTENSION + def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0) + menu "Kernel type and options" source "kernel/Kconfig.hz" @@ -534,6 +543,18 @@ config CPU_HAS_LASX If unsure, say Y. +config CPU_HAS_LBT + bool "Support for the Loongson Binary Translation Extension" + depends on AS_HAS_LBT_EXTENSION + help + Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0 + to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop). + Enabling this option allows the kernel to allocate and switch registers + specific to LBT. + + If you want to use this feature, such as the Loongson Architecture + Translator (LAT), say Y. + config CPU_HAS_PREFETCH bool default y @@ -638,6 +659,11 @@ config ARCH_MMAP_RND_BITS_MAX config ARCH_SUPPORTS_UPROBES def_bool y +config KASAN_SHADOW_OFFSET + hex + default 0x0 + depends on KASAN + menu "Power management options" config ARCH_SUSPEND_POSSIBLE diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index ef87bab46754..fb0fada43197 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -84,7 +84,10 @@ LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext endif cflags-y += $(call cc-option, -mno-check-zero-division) + +ifndef CONFIG_KASAN cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset +endif load-y = 0x9000000000200000 bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index d64849b4cba1..a3b52aaa83b3 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -30,7 +30,6 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y -CONFIG_SYSFS_DEPRECATED=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y @@ -47,8 +46,12 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y CONFIG_NR_CPUS=64 CONFIG_NUMA=y +CONFIG_CPU_HAS_FPU=y +CONFIG_CPU_HAS_LSX=y +CONFIG_CPU_HAS_LASX=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y +CONFIG_RANDOMIZE_BASE=y CONFIG_SUSPEND=y CONFIG_HIBERNATION=y CONFIG_ACPI=y @@ -63,6 +66,7 @@ CONFIG_EFI_ZBOOT=y CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y CONFIG_EFI_CAPSULE_LOADER=m CONFIG_EFI_TEST=m +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -108,7 +112,12 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=m CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y CONFIG_INET_ESP=m CONFIG_INET_UDP_DIAG=y CONFIG_TCP_CONG_ADVANCED=y @@ -137,7 +146,6 @@ CONFIG_NFT_MASQ=m CONFIG_NFT_REDIR=m CONFIG_NFT_NAT=m CONFIG_NFT_TUNNEL=m -CONFIG_NFT_OBJREF=m CONFIG_NFT_QUEUE=m CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m @@ -208,7 +216,11 @@ CONFIG_IP_VS=m CONFIG_IP_VS_IPV6=y CONFIG_IP_VS_PROTO_TCP=y CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_PROTO_SCTP=y CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m CONFIG_IP_VS_NFCT=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m @@ -227,7 +239,6 @@ CONFIG_IP_NF_TARGET_MASQUERADE=m CONFIG_IP_NF_TARGET_NETMAP=m CONFIG_IP_NF_TARGET_REDIRECT=m CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m @@ -363,6 +374,8 @@ CONFIG_MTD_CFI_AMDSTD=m CONFIG_MTD_CFI_STAA=m CONFIG_MTD_RAM=m CONFIG_MTD_ROM=m +CONFIG_MTD_UBI=m +CONFIG_MTD_UBI_BLOCK=y CONFIG_PARPORT=y CONFIG_PARPORT_PC=y CONFIG_PARPORT_SERIAL=y @@ -370,6 +383,7 @@ CONFIG_PARPORT_PC_FIFO=y CONFIG_ZRAM=m CONFIG_ZRAM_DEF_COMP_ZSTD=y CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 @@ -516,6 +530,8 @@ CONFIG_STMMAC_ETH=y # CONFIG_NET_VENDOR_TEHUTI is not set # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VIA is not set +CONFIG_NGBE=y +CONFIG_TXGBE=y # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set CONFIG_PPP=m @@ -602,9 +618,15 @@ CONFIG_HW_RANDOM_VIRTIO=m CONFIG_I2C_CHARDEV=y CONFIG_I2C_PIIX4=y CONFIG_I2C_GPIO=y +CONFIG_I2C_LS2X=y CONFIG_SPI=y +CONFIG_SPI_LOONGSON_PCI=m +CONFIG_SPI_LOONGSON_PLATFORM=m +CONFIG_PINCTRL=y +CONFIG_PINCTRL_LOONGSON2=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_LOONGSON=y +CONFIG_GPIO_LOONGSON_64BIT=y CONFIG_POWER_RESET=y CONFIG_POWER_RESET_RESTART=y CONFIG_POWER_RESET_SYSCON=y @@ -614,6 +636,7 @@ CONFIG_SENSORS_LM75=m CONFIG_SENSORS_LM93=m CONFIG_SENSORS_W83795=m CONFIG_SENSORS_W83627HF=m +CONFIG_LOONGSON2_THERMAL=m CONFIG_RC_CORE=m CONFIG_LIRC=y CONFIG_RC_DECODERS=y @@ -643,6 +666,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y CONFIG_DRM_AST=y CONFIG_DRM_QXL=m CONFIG_DRM_VIRTIO_GPU=m +CONFIG_DRM_LOONGSON=y CONFIG_FB=y CONFIG_FB_EFI=y CONFIG_FB_RADEON=y @@ -712,6 +736,7 @@ CONFIG_UCSI_ACPI=m CONFIG_INFINIBAND=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y +CONFIG_RTC_DRV_LOONGSON=y CONFIG_DMADEVICES=y CONFIG_UIO=m CONFIG_UIO_PDRV_GENIRQ=m @@ -745,7 +770,9 @@ CONFIG_COMEDI_NI_LABPC_PCI=m CONFIG_COMEDI_NI_PCIDIO=m CONFIG_COMEDI_NI_PCIMIO=m CONFIG_STAGING=y -CONFIG_R8188EU=m +CONFIG_COMMON_CLK_LOONGSON2=y +CONFIG_LOONGSON2_GUTS=y +CONFIG_LOONGSON2_PM=y CONFIG_PM_DEVFREQ=y CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y CONFIG_DEVFREQ_GOV_PERFORMANCE=y @@ -759,10 +786,17 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y +CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y @@ -771,11 +805,14 @@ CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=y CONFIG_OVERLAY_FS_INDEX=y CONFIG_OVERLAY_FS_XINO_AUTO=y CONFIG_OVERLAY_FS_METACOPY=y CONFIG_FSCACHE=y +CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -784,19 +821,42 @@ CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_FAT_DEFAULT_CODEPAGE=936 CONFIG_FAT_DEFAULT_IOCHARSET="gb2312" +CONFIG_EXFAT_FS=m +CONFIG_NTFS3_FS=m +CONFIG_NTFS3_64BIT_CLUSTER=y +CONFIG_NTFS3_LZX_XPRESS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=y +CONFIG_ORANGEFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_ECRYPT_FS_MESSAGING=y CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m +CONFIG_UBIFS_FS=m +CONFIG_UBIFS_FS_ADVANCED_COMPR=y CONFIG_CRAMFS=m CONFIG_SQUASHFS=y CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y +CONFIG_MINIX_FS=m +CONFIG_ROMFS_FS=m +CONFIG_PSTORE=m +CONFIG_PSTORE_LZO_COMPRESS=m +CONFIG_PSTORE_LZ4_COMPRESS=m +CONFIG_PSTORE_LZ4HC_COMPRESS=m +CONFIG_PSTORE_842_COMPRESS=y +CONFIG_PSTORE_ZSTD_COMPRESS=y +CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y +CONFIG_SYSV_FS=m +CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_ZIP_LZMA=y +CONFIG_EROFS_FS_PCPU_KTHREAD=y CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y @@ -807,6 +867,10 @@ CONFIG_NFSD=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_BLOCKLAYOUT=y +CONFIG_CEPH_FS=m +CONFIG_CEPH_FSCACHE=y +CONFIG_CEPH_FS_POSIX_ACL=y +CONFIG_CEPH_FS_SECURITY_LABEL=y CONFIG_CIFS=m # CONFIG_CIFS_DEBUG is not set CONFIG_9P_FS=y @@ -814,6 +878,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_936=y CONFIG_NLS_ASCII=y CONFIG_NLS_UTF8=y +CONFIG_DLM=m CONFIG_KEY_DH_OPERATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_SELINUX=y @@ -847,6 +912,7 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_CRC32_LOONGARCH=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_PRINTK_TIME=y CONFIG_STRIP_ASM_SYMS=y diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h index ed06d3997420..cf8e1a4e7c19 100644 --- a/arch/loongarch/include/asm/asm-prototypes.h +++ b/arch/loongarch/include/asm/asm-prototypes.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include #include #include #include diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index 79e1d53fea89..c9544f358c33 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -10,113 +10,6 @@ #include #include - .macro parse_v var val - \var = \val - .endm - - .macro parse_r var r - \var = -1 - .ifc \r, $r0 - \var = 0 - .endif - .ifc \r, $r1 - \var = 1 - .endif - .ifc \r, $r2 - \var = 2 - .endif - .ifc \r, $r3 - \var = 3 - .endif - .ifc \r, $r4 - \var = 4 - .endif - .ifc \r, $r5 - \var = 5 - .endif - .ifc \r, $r6 - \var = 6 - .endif - .ifc \r, $r7 - \var = 7 - .endif - .ifc \r, $r8 - \var = 8 - .endif - .ifc \r, $r9 - \var = 9 - .endif - .ifc \r, $r10 - \var = 10 - .endif - .ifc \r, $r11 - \var = 11 - .endif - .ifc \r, $r12 - \var = 12 - .endif - .ifc \r, $r13 - \var = 13 - .endif - .ifc \r, $r14 - \var = 14 - .endif - .ifc \r, $r15 - \var = 15 - .endif - .ifc \r, $r16 - \var = 16 - .endif - .ifc \r, $r17 - \var = 17 - .endif - .ifc \r, $r18 - \var = 18 - .endif - .ifc \r, $r19 - \var = 19 - .endif - .ifc \r, $r20 - \var = 20 - .endif - .ifc \r, $r21 - \var = 21 - .endif - .ifc \r, $r22 - \var = 22 - .endif - .ifc \r, $r23 - \var = 23 - .endif - .ifc \r, $r24 - \var = 24 - .endif - .ifc \r, $r25 - \var = 25 - .endif - .ifc \r, $r26 - \var = 26 - .endif - .ifc \r, $r27 - \var = 27 - .endif - .ifc \r, $r28 - \var = 28 - .endif - .ifc \r, $r29 - \var = 29 - .endif - .ifc \r, $r30 - \var = 30 - .endif - .ifc \r, $r31 - \var = 31 - .endif - .iflt \var - .error "Unable to parse register name \r" - .endif - .endm - .macro cpu_save_nonscratch thread stptr.d s0, \thread, THREAD_REG23 stptr.d s1, \thread, THREAD_REG24 @@ -148,12 +41,51 @@ .macro fpu_save_csr thread tmp movfcsr2gr \tmp, fcsr0 - stptr.w \tmp, \thread, THREAD_FCSR + stptr.w \tmp, \thread, THREAD_FCSR +#ifdef CONFIG_CPU_HAS_LBT + /* TM bit is always 0 if LBT not supported */ + andi \tmp, \tmp, FPU_CSR_TM + beqz \tmp, 1f + /* Save FTOP */ + x86mftop \tmp + stptr.w \tmp, \thread, THREAD_FTOP + /* Turn off TM to ensure the order of FPR in memory independent of TM */ + x86clrtm +1: +#endif .endm - .macro fpu_restore_csr thread tmp - ldptr.w \tmp, \thread, THREAD_FCSR - movgr2fcsr fcsr0, \tmp + .macro fpu_restore_csr thread tmp0 tmp1 + ldptr.w \tmp0, \thread, THREAD_FCSR + movgr2fcsr fcsr0, \tmp0 +#ifdef CONFIG_CPU_HAS_LBT + /* TM bit is always 0 if LBT not supported */ + andi \tmp0, \tmp0, FPU_CSR_TM + beqz \tmp0, 2f + /* Restore FTOP */ + ldptr.w \tmp0, \thread, THREAD_FTOP + andi \tmp0, \tmp0, 0x7 + la.pcrel \tmp1, 1f + alsl.d \tmp1, \tmp0, \tmp1, 3 + jr \tmp1 +1: + x86mttop 0 + b 2f + x86mttop 1 + b 2f + x86mttop 2 + b 2f + x86mttop 3 + b 2f + x86mttop 4 + b 2f + x86mttop 5 + b 2f + x86mttop 6 + b 2f + x86mttop 7 +2: +#endif .endm .macro fpu_save_cc thread tmp0 tmp1 @@ -353,7 +285,7 @@ .macro lsx_restore_all thread tmp0 tmp1 lsx_restore_data \thread, \tmp0 fpu_restore_cc \thread, \tmp0, \tmp1 - fpu_restore_csr \thread, \tmp0 + fpu_restore_csr \thread, \tmp0, \tmp1 .endm .macro lsx_save_upper vd base tmp off @@ -563,7 +495,7 @@ .macro lasx_restore_all thread tmp0 tmp1 lasx_restore_data \thread, \tmp0 fpu_restore_cc \thread, \tmp0, \tmp1 - fpu_restore_csr \thread, \tmp0 + fpu_restore_csr \thread, \tmp0, \tmp1 .endm .macro lasx_save_upper xd base tmp off diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h new file mode 100644 index 000000000000..deeff8158f45 --- /dev/null +++ b/arch/loongarch/include/asm/kasan.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +#define __HAVE_ARCH_SHADOW_MAP + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + +#define XRANGE_SHIFT (48) + +/* Valid address length */ +#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +/* Used for taking out the valid address */ +#define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0) +/* One segment whole address space size */ +#define XRANGE_SIZE (XRANGE_SHADOW_MASK + 1) + +/* 64-bit segment value. */ +#define XKPRANGE_UC_SEG (0x8000) +#define XKPRANGE_CC_SEG (0x9000) +#define XKVRANGE_VC_SEG (0xffff) + +/* Cached */ +#define XKPRANGE_CC_START CACHE_BASE +#define XKPRANGE_CC_SIZE XRANGE_SIZE +#define XKPRANGE_CC_KASAN_OFFSET (0) +#define XKPRANGE_CC_SHADOW_SIZE (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKPRANGE_CC_SHADOW_END (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE) + +/* UnCached */ +#define XKPRANGE_UC_START UNCACHE_BASE +#define XKPRANGE_UC_SIZE XRANGE_SIZE +#define XKPRANGE_UC_KASAN_OFFSET XKPRANGE_CC_SHADOW_END +#define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE) + +/* VMALLOC (Cached or UnCached) */ +#define XKVRANGE_VC_START MODULES_VADDR +#define XKVRANGE_VC_SIZE round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE) +#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END +#define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE) + +/* KAsan shadow memory start right after vmalloc. */ +#define KASAN_SHADOW_START round_up(KFENCE_AREA_END, PGDIR_SIZE) +#define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET) +#define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE) + +#define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET) +#define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET) +#define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET) + +extern bool kasan_early_stage; +extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; + +#define kasan_arch_is_ready kasan_arch_is_ready +static __always_inline bool kasan_arch_is_ready(void) +{ + return !kasan_early_stage; +} + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + if (!kasan_arch_is_ready()) { + return (void *)(kasan_early_shadow_page); + } else { + unsigned long maddr = (unsigned long)addr; + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; + unsigned long offset = 0; + + maddr &= XRANGE_SHADOW_MASK; + switch (xrange) { + case XKPRANGE_CC_SEG: + offset = XKPRANGE_CC_SHADOW_OFFSET; + break; + case XKPRANGE_UC_SEG: + offset = XKPRANGE_UC_SHADOW_OFFSET; + break; + case XKVRANGE_VC_SEG: + offset = XKVRANGE_VC_SHADOW_OFFSET; + break; + default: + WARN_ON(1); + return NULL; + } + + return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); + } +} + +static inline const void *kasan_shadow_to_mem(const void *shadow_addr) +{ + unsigned long addr = (unsigned long)shadow_addr; + + if (unlikely(addr > KASAN_SHADOW_END) || + unlikely(addr < KASAN_SHADOW_START)) { + WARN_ON(1); + return NULL; + } + + if (addr >= XKVRANGE_VC_SHADOW_OFFSET) + return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); + else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); + else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); + else { + WARN_ON(1); + return NULL; + } +} + +void kasan_init(void); +asmlinkage void kasan_early_init(void); + +#endif +#endif diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h new file mode 100644 index 000000000000..6c82aea1c993 --- /dev/null +++ b/arch/loongarch/include/asm/kfence.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KFENCE support for LoongArch. + * + * Author: Enze Li + * Copyright (C) 2022-2023 KylinSoft Corporation. + */ + +#ifndef _ASM_LOONGARCH_KFENCE_H +#define _ASM_LOONGARCH_KFENCE_H + +#include +#include +#include + +static inline bool arch_kfence_init_pool(void) +{ + int err; + char *kfence_pool = __kfence_pool; + struct vm_struct *area; + + area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP, + KFENCE_AREA_START, KFENCE_AREA_END, + __builtin_return_address(0)); + if (!area) + return false; + + __kfence_pool = (char *)area->addr; + err = ioremap_page_range((unsigned long)__kfence_pool, + (unsigned long)__kfence_pool + KFENCE_POOL_SIZE, + virt_to_phys((void *)kfence_pool), PAGE_KERNEL); + if (err) { + free_vm_area(area); + __kfence_pool = kfence_pool; + return false; + } + + return true; +} + +/* Protect the given page and flush TLB. */ +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + pte_t *pte = virt_to_kpte(addr); + + if (WARN_ON(!pte) || pte_none(*pte)) + return false; + + if (protect) + set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT))); + else + set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT))); + + preempt_disable(); + local_flush_tlb_one(addr); + preempt_enable(); + + return true; +} + +#endif /* _ASM_LOONGARCH_KFENCE_H */ diff --git a/arch/loongarch/include/asm/kgdb.h b/arch/loongarch/include/asm/kgdb.h new file mode 100644 index 000000000000..2041ae58b161 --- /dev/null +++ b/arch/loongarch/include/asm/kgdb.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_LOONGARCH_KGDB_H +#define _ASM_LOONGARCH_KGDB_H + +#define GDB_SIZEOF_REG sizeof(u64) + +/* gdb remote procotol expects the following register layout. */ + +/* + * General purpose registers: + * r0-r31: 64 bit + * orig_a0: 64 bit + * pc : 64 bit + * csr_badvaddr: 64 bit + */ +#define DBG_PT_REGS_BASE 0 +#define DBG_PT_REGS_NUM 35 +#define DBG_PT_REGS_END (DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1) + +/* + * Floating point registers: + * f0-f31: 64 bit + */ +#define DBG_FPR_BASE (DBG_PT_REGS_END + 1) +#define DBG_FPR_NUM 32 +#define DBG_FPR_END (DBG_FPR_BASE + DBG_FPR_NUM - 1) + +/* + * Condition Flag registers: + * fcc0-fcc8: 8 bit + */ +#define DBG_FCC_BASE (DBG_FPR_END + 1) +#define DBG_FCC_NUM 8 +#define DBG_FCC_END (DBG_FCC_BASE + DBG_FCC_NUM - 1) + +/* + * Floating-point Control and Status registers: + * fcsr: 32 bit + */ +#define DBG_FCSR_NUM 1 +#define DBG_FCSR (DBG_FCC_END + 1) + +#define DBG_MAX_REG_NUM (DBG_FCSR + 1) + +/* + * Size of I/O buffer for gdb packet. + * considering to hold all register contents, size is set + */ +#define BUFMAX 2048 + +/* + * Number of bytes required for gdb_regs buffer. + * PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes. + * GDB fails to connect for size beyond this with error + * "'g' packet reply is too long" + */ +#define NUMREGBYTES ((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4) + +#define BREAK_INSTR_SIZE 4 +#define CACHE_FLUSH_IS_SAFE 0 + +/* Register numbers of various important registers. */ +enum dbg_loongarch_regnum { + DBG_LOONGARCH_ZERO = 0, + DBG_LOONGARCH_RA, + DBG_LOONGARCH_TP, + DBG_LOONGARCH_SP, + DBG_LOONGARCH_A0, + DBG_LOONGARCH_FP = 22, + DBG_LOONGARCH_S0, + DBG_LOONGARCH_S1, + DBG_LOONGARCH_S2, + DBG_LOONGARCH_S3, + DBG_LOONGARCH_S4, + DBG_LOONGARCH_S5, + DBG_LOONGARCH_S6, + DBG_LOONGARCH_S7, + DBG_LOONGARCH_S8, + DBG_LOONGARCH_ORIG_A0, + DBG_LOONGARCH_PC, + DBG_LOONGARCH_BADV +}; + +void kgdb_breakinst(void); +void arch_kgdb_breakpoint(void); + +#ifdef CONFIG_KGDB +bool kgdb_breakpoint_handler(struct pt_regs *regs); +#else /* !CONFIG_KGDB */ +static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; } +#endif /* CONFIG_KGDB */ + +#endif /* __ASM_KGDB_H_ */ diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h new file mode 100644 index 000000000000..e671978bf552 --- /dev/null +++ b/arch/loongarch/include/asm/lbt.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Qi Hu + * Huacai Chen + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited + */ +#ifndef _ASM_LBT_H +#define _ASM_LBT_H + +#include +#include +#include +#include + +extern void _init_lbt(void); +extern void _save_lbt(struct loongarch_lbt *); +extern void _restore_lbt(struct loongarch_lbt *); + +static inline int is_lbt_enabled(void) +{ + if (!cpu_has_lbt) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ? + 1 : 0; +} + +static inline int is_lbt_owner(void) +{ + return test_thread_flag(TIF_USEDLBT); +} + +#ifdef CONFIG_CPU_HAS_LBT + +static inline void enable_lbt(void) +{ + if (cpu_has_lbt) + csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lbt(void) +{ + if (cpu_has_lbt) + csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN); +} + +static inline void __own_lbt(void) +{ + enable_lbt(); + set_thread_flag(TIF_USEDLBT); + KSTK_EUEN(current) |= CSR_EUEN_LBTEN; +} + +static inline void own_lbt_inatomic(int restore) +{ + if (cpu_has_lbt && !is_lbt_owner()) { + __own_lbt(); + if (restore) + _restore_lbt(¤t->thread.lbt); + } +} + +static inline void own_lbt(int restore) +{ + preempt_disable(); + own_lbt_inatomic(restore); + preempt_enable(); +} + +static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) +{ + if (cpu_has_lbt && is_lbt_owner()) { + if (save) + _save_lbt(&tsk->thread.lbt); + + disable_lbt(); + clear_tsk_thread_flag(tsk, TIF_USEDLBT); + } + KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN); +} + +static inline void lose_lbt(int save) +{ + preempt_disable(); + lose_lbt_inatomic(save, current); + preempt_enable(); +} + +static inline void init_lbt(void) +{ + __own_lbt(); + _init_lbt(); +} +#else +static inline void own_lbt_inatomic(int restore) {} +static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {} +static inline void init_lbt(void) {} +static inline void lose_lbt(int save) {} +#endif + +static inline int thread_lbt_context_live(void) +{ + if (!cpu_has_lbt) + return 0; + + return test_thread_flag(TIF_LBT_CTX_LIVE); +} + +#endif /* _ASM_LBT_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 10748a20a2ab..33531d432b49 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -12,49 +12,6 @@ #ifndef __ASSEMBLY__ #include -/* - * parse_r var, r - Helper assembler macro for parsing register names. - * - * This converts the register name in $n form provided in \r to the - * corresponding register number, which is assigned to the variable \var. It is - * needed to allow explicit encoding of instructions in inline assembly where - * registers are chosen by the compiler in $n form, allowing us to avoid using - * fixed register numbers. - * - * It also allows newer instructions (not implemented by the assembler) to be - * transparently implemented using assembler macros, instead of needing separate - * cases depending on toolchain support. - * - * Simple usage example: - * __asm__ __volatile__("parse_r addr, %0\n\t" - * "#invtlb op, 0, %0\n\t" - * ".word ((0x6498000) | (addr << 10) | (0 << 5) | op)" - * : "=r" (status); - */ - -/* Match an individual register number and assign to \var */ -#define _IFC_REG(n) \ - ".ifc \\r, $r" #n "\n\t" \ - "\\var = " #n "\n\t" \ - ".endif\n\t" - -__asm__(".macro parse_r var r\n\t" - "\\var = -1\n\t" - _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) - _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) - _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) - _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) - _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) - _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) - _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) - _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) - ".iflt \\var\n\t" - ".error \"Unable to parse register name \\r\"\n\t" - ".endif\n\t" - ".endm"); - -#undef _IFC_REG - /* CPUCFG */ #define read_cpucfg(reg) __cpucfg(reg) @@ -1453,6 +1410,10 @@ __BUILD_CSR_OP(tlbidx) #define FPU_CSR_RU 0x200 /* towards +Infinity */ #define FPU_CSR_RD 0x300 /* towards -Infinity */ +/* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */ +#define FPU_CSR_TM_SHIFT 0x6 +#define FPU_CSR_TM (_ULCAST_(1) << FPU_CSR_TM_SHIFT) + #define read_fcsr(source) \ ({ \ unsigned int __res; \ diff --git a/arch/loongarch/include/asm/mmzone.h b/arch/loongarch/include/asm/mmzone.h index fe67d0b4b33d..2b9a90727e19 100644 --- a/arch/loongarch/include/asm/mmzone.h +++ b/arch/loongarch/include/asm/mmzone.h @@ -13,6 +13,4 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[(nid)]) -extern void setup_zero_pages(void); - #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 26e8dccb6619..63f137ce82a4 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -84,7 +84,12 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) #define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) -#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) + +#define virt_to_page(kaddr) \ +({ \ + (likely((unsigned long)kaddr < vm_map_base)) ? \ + dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\ +}) extern int __virt_addr_valid(volatile void *kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr)) diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 23f5b1107246..79470f0b4f1d 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -94,4 +94,5 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) #endif /* __PAGETABLE_PUD_FOLDED */ +extern pte_t * __init populate_kernel_pte(unsigned long addr); #endif /* _ASM_PGALLOC_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 06963a172319..29d9b12298bc 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -70,12 +70,9 @@ struct vm_area_struct; * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page; -extern unsigned long zero_page_mask; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) \ - (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))) -#define __HAVE_COLOR_ZERO_PAGE +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) /* * TLB refill handlers may also map the vmalloc area into xkvrange. @@ -85,14 +82,30 @@ extern unsigned long zero_page_mask; #define MODULES_VADDR (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE)) #define MODULES_END (MODULES_VADDR + SZ_256M) +#ifdef CONFIG_KFENCE +#define KFENCE_AREA_SIZE (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE) +#else +#define KFENCE_AREA_SIZE 0 +#endif + #define VMALLOC_START MODULES_END + +#ifndef CONFIG_KASAN #define VMALLOC_END \ (vm_map_base + \ - min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE) + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE) +#else +#define VMALLOC_END \ + (vm_map_base + \ + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE) +#endif #define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK)) #define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1) +#define KFENCE_AREA_START (VMEMMAP_END + 1) +#define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1) + #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) #ifndef __PAGETABLE_PMD_FOLDED @@ -350,6 +363,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt extern pgd_t swapper_pg_dir[]; extern pgd_t invalid_pg_dir[]; +struct page *dmw_virt_to_page(unsigned long kaddr); +struct page *tlb_virt_to_page(unsigned long kaddr); + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -596,6 +612,9 @@ static inline long pmd_protnone(pmd_t pmd) } #endif /* CONFIG_NUMA_BALANCING */ +#define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0) +#define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0) + /* * We provide our own get_unmapped area to cope with the virtual aliasing * constraints placed on us by the cache architecture. diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 636e1c66398c..c3bc44b5f5b3 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -80,11 +80,22 @@ BUILD_FPR_ACCESS(32) BUILD_FPR_ACCESS(64) struct loongarch_fpu { - unsigned int fcsr; uint64_t fcc; /* 8x8 */ + uint32_t fcsr; + uint32_t ftop; union fpureg fpr[NUM_FPU_REGS]; }; +struct loongarch_lbt { + /* Scratch registers */ + unsigned long scr0; + unsigned long scr1; + unsigned long scr2; + unsigned long scr3; + /* Eflags register */ + unsigned long eflags; +}; + #define INIT_CPUMASK { \ {0,} \ } @@ -113,15 +124,6 @@ struct thread_struct { unsigned long csr_ecfg; unsigned long csr_badvaddr; /* Last user fault */ - /* Scratch registers */ - unsigned long scr0; - unsigned long scr1; - unsigned long scr2; - unsigned long scr3; - - /* Eflags register */ - unsigned long eflags; - /* Other stuff associated with the thread. */ unsigned long trap_nr; unsigned long error_code; @@ -133,6 +135,7 @@ struct thread_struct { * context because they are conditionally copied at fork(). */ struct loongarch_fpu fpu FPU_ALIGN; + struct loongarch_lbt lbt; /* Also conditionally copied */ /* Hardware breakpoints pinned to this task. */ struct perf_event *hbp_break[LOONGARCH_MAX_BRP]; @@ -174,8 +177,9 @@ struct thread_struct { * FPU & vector registers \ */ \ .fpu = { \ - .fcsr = 0, \ .fcc = 0, \ + .fcsr = 0, \ + .ftop = 0, \ .fpr = {{{0,},},}, \ }, \ .hbp_break = {0}, \ diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index be05c0e706a2..a0bc159ce8bd 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -7,6 +7,7 @@ #define _LOONGARCH_SETUP_H #include +#include #include #define VECSIZE 0x200 @@ -33,8 +34,13 @@ extern long __la_abs_end; extern long __rela_dyn_begin; extern long __rela_dyn_end; -extern void * __init relocate_kernel(void); +extern unsigned long __init relocate_kernel(void); #endif +static inline unsigned long kaslr_offset(void) +{ + return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS; +} + #endif /* __SETUP_H */ diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 7df80e6ae9d2..4fb1e6408b98 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -158,6 +158,10 @@ cfi_st u0, PT_R21, \docfi csrrd u0, PERCPU_BASE_KS 9: +#ifdef CONFIG_KGDB + li.w t0, CSR_CRMD_WE + csrxchg t0, t0, LOONGARCH_CSR_CRMD +#endif .endm .macro SAVE_ALL docfi=0 diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h index 7b29cc9c70aa..5bb5a90d2681 100644 --- a/arch/loongarch/include/asm/string.h +++ b/arch/loongarch/include/asm/string.h @@ -7,11 +7,31 @@ #define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, size_t __count); +extern void *__memset(void *__s, int __c, size_t __count); #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *__to, __const__ void *__from, size_t __n); +extern void *__memcpy(void *__to, __const__ void *__from, size_t __n); #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *__dest, __const__ void *__src, size_t __n); +extern void *__memmove(void *__dest, __const__ void *__src, size_t __n); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memset(s, c, n) __memset(s, c, n) +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif #endif /* _ASM_STRING_H */ diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h index 24e3094bebab..5b225aff3ba2 100644 --- a/arch/loongarch/include/asm/switch_to.h +++ b/arch/loongarch/include/asm/switch_to.h @@ -7,6 +7,7 @@ #include #include +#include struct task_struct; @@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, #define switch_to(prev, next, last) \ do { \ lose_fpu_inatomic(1, prev); \ + lose_lbt_inatomic(1, prev); \ hw_breakpoint_thread_switch(next); \ (last) = __switch_to(prev, next, task_thread_info(next), \ __builtin_return_address(0), __builtin_frame_address(0)); \ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 1a3354ca056e..8cb653d49a54 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -84,6 +84,8 @@ register unsigned long current_stack_pointer __asm__("$sp"); #define TIF_SINGLESTEP 16 /* Single Step */ #define TIF_LSX_CTX_LIVE 17 /* LSX context must be preserved */ #define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */ +#define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */ +#define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */ #define _TIF_SIGPENDING (1< + */ +#ifndef _ASM_LOONGARCH_XOR_H +#define _ASM_LOONGARCH_XOR_H + +#include +#include + +#ifdef CONFIG_CPU_HAS_LSX +static struct xor_block_template xor_block_lsx = { + .name = "lsx", + .do_2 = xor_lsx_2, + .do_3 = xor_lsx_3, + .do_4 = xor_lsx_4, + .do_5 = xor_lsx_5, +}; + +#define XOR_SPEED_LSX() \ + do { \ + if (cpu_has_lsx) \ + xor_speed(&xor_block_lsx); \ + } while (0) +#else /* CONFIG_CPU_HAS_LSX */ +#define XOR_SPEED_LSX() +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +static struct xor_block_template xor_block_lasx = { + .name = "lasx", + .do_2 = xor_lasx_2, + .do_3 = xor_lasx_3, + .do_4 = xor_lasx_4, + .do_5 = xor_lasx_5, +}; + +#define XOR_SPEED_LASX() \ + do { \ + if (cpu_has_lasx) \ + xor_speed(&xor_block_lasx); \ + } while (0) +#else /* CONFIG_CPU_HAS_LASX */ +#define XOR_SPEED_LASX() +#endif /* CONFIG_CPU_HAS_LASX */ + +/* + * For grins, also test the generic routines. + * + * More importantly: it cannot be ruled out at this point of time, that some + * future (maybe reduced) models could run the vector algorithms slower than + * the scalar ones, maybe for errata or micro-op reasons. It may be + * appropriate to revisit this after one or two more uarch generations. + */ +#include + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_8regs_p); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_32regs_p); \ + XOR_SPEED_LSX(); \ + XOR_SPEED_LASX(); \ +} while (0) + +#endif /* _ASM_LOONGARCH_XOR_H */ diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h new file mode 100644 index 000000000000..471b96332f38 --- /dev/null +++ b/arch/loongarch/include/asm/xor_simd.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 WANG Xuerui + */ +#ifndef _ASM_LOONGARCH_XOR_SIMD_H +#define _ASM_LOONGARCH_XOR_SIMD_H + +#ifdef CONFIG_CPU_HAS_LSX +void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3); +void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4, const unsigned long * __restrict p5); +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3); +void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4, const unsigned long * __restrict p5); +#endif /* CONFIG_CPU_HAS_LASX */ + +#endif /* _ASM_LOONGARCH_XOR_SIMD_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index 06e3be52cb04..ac915f841650 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -56,6 +56,12 @@ struct user_lasx_state { uint64_t vregs[32*4]; }; +struct user_lbt_state { + uint64_t scr[4]; + uint32_t eflags; + uint32_t ftop; +}; + struct user_watch_state { uint64_t dbg_info; struct { diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h index 4cd7d16f7037..6c22f616b8f1 100644 --- a/arch/loongarch/include/uapi/asm/sigcontext.h +++ b/arch/loongarch/include/uapi/asm/sigcontext.h @@ -59,4 +59,14 @@ struct lasx_context { __u32 fcsr; }; +/* LBT context */ +#define LBT_CTX_MAGIC 0x42540001 +#define LBT_CTX_ALIGN 8 +struct lbt_context { + __u64 regs[4]; + __u32 eflags; + __u32 ftop; +}; + + #endif /* _UAPI_ASM_SIGCONTEXT_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 8e279f04f9e7..c56ea0b75448 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -15,6 +15,8 @@ obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o +obj-$(CONFIG_CPU_HAS_LBT) += lbt.o + obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o ifdef CONFIG_FUNCTION_TRACER @@ -32,6 +34,12 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE) endif +KASAN_SANITIZE_efi.o := n +KASAN_SANITIZE_cpu-probe.o := n +KASAN_SANITIZE_traps.o := n +KASAN_SANITIZE_smp.o := n +KASAN_SANITIZE_vdso.o := n + obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -54,6 +62,7 @@ obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o +obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o obj-$(CONFIG_UPROBES) += uprobes.o diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 505e4bf59603..8da0726777ed 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -118,13 +118,6 @@ void output_thread_defines(void) OFFSET(THREAD_CSRECFG, task_struct, thread.csr_ecfg); - OFFSET(THREAD_SCR0, task_struct, thread.scr0); - OFFSET(THREAD_SCR1, task_struct, thread.scr1); - OFFSET(THREAD_SCR2, task_struct, thread.scr2); - OFFSET(THREAD_SCR3, task_struct, thread.scr3); - - OFFSET(THREAD_EFLAGS, task_struct, thread.eflags); - OFFSET(THREAD_FPU, task_struct, thread.fpu); OFFSET(THREAD_BVADDR, task_struct, \ @@ -172,6 +165,17 @@ void output_thread_fpu_defines(void) OFFSET(THREAD_FCSR, loongarch_fpu, fcsr); OFFSET(THREAD_FCC, loongarch_fpu, fcc); + OFFSET(THREAD_FTOP, loongarch_fpu, ftop); + BLANK(); +} + +void output_thread_lbt_defines(void) +{ + OFFSET(THREAD_SCR0, loongarch_lbt, scr0); + OFFSET(THREAD_SCR1, loongarch_lbt, scr1); + OFFSET(THREAD_SCR2, loongarch_lbt, scr2); + OFFSET(THREAD_SCR3, loongarch_lbt, scr3); + OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags); BLANK(); } diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index e925579c7a71..55320813ee08 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -144,6 +144,20 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_LVZ; elf_hwcap |= HWCAP_LOONGARCH_LVZ; } +#ifdef CONFIG_CPU_HAS_LBT + if (config & CPUCFG2_X86BT) { + c->options |= LOONGARCH_CPU_LBT_X86; + elf_hwcap |= HWCAP_LOONGARCH_LBT_X86; + } + if (config & CPUCFG2_ARMBT) { + c->options |= LOONGARCH_CPU_LBT_ARM; + elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM; + } + if (config & CPUCFG2_MIPSBT) { + c->options |= LOONGARCH_CPU_LBT_MIPS; + elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS; + } +#endif config = read_cpucfg(LOONGARCH_CPUCFG6); if (config & CPUCFG6_PMP) diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index d737e3cf42d3..65518bb8f472 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -58,6 +58,11 @@ SYM_FUNC_START(handle_syscall) SAVE_STATIC +#ifdef CONFIG_KGDB + li.w t1, CSR_CRMD_WE + csrxchg t1, t1, LOONGARCH_CSR_CRMD +#endif + move u0, t0 li.d tp, ~_THREAD_MASK and tp, tp, sp diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 501094a09f5d..d53ab10f4644 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -22,7 +22,7 @@ .macro EX insn, reg, src, offs .ex\@: \insn \reg, \src, \offs - _asm_extable .ex\@, fault + _asm_extable .ex\@, .L_fpu_fault .endm .macro sc_save_fp base @@ -138,6 +138,13 @@ .macro sc_save_fcsr base, tmp0 movfcsr2gr \tmp0, fcsr0 EX st.w \tmp0, \base, 0 +#if defined(CONFIG_CPU_HAS_LBT) + /* TM bit is always 0 if LBT not supported */ + andi \tmp0, \tmp0, FPU_CSR_TM + beqz \tmp0, 1f + x86clrtm +1: +#endif .endm .macro sc_restore_fcsr base, tmp0 @@ -309,7 +316,7 @@ EXPORT_SYMBOL(_save_fp) */ SYM_FUNC_START(_restore_fp) fpu_restore_double a0 t1 # clobbers t1 - fpu_restore_csr a0 t1 + fpu_restore_csr a0 t1 t2 fpu_restore_cc a0 t1 t2 # clobbers t1, t2 jr ra SYM_FUNC_END(_restore_fp) @@ -514,7 +521,6 @@ SYM_FUNC_START(_restore_lasx_context) jr ra SYM_FUNC_END(_restore_lasx_context) -SYM_FUNC_START(fault) +.L_fpu_fault: li.w a0, -EFAULT # failure jr ra -SYM_FUNC_END(fault) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 5e828a8bc0a0..53b883db0786 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -95,12 +95,17 @@ SYM_CODE_START(kernel_entry) # kernel entry point PTR_LI sp, (_THREAD_SIZE - PT_SIZE) PTR_ADD sp, sp, tp set_saved_sp sp, t0, t1 -#endif - /* relocate_kernel() returns the new kernel entry point */ - jr a0 - ASM_BUG() + /* Jump to the new kernel: new_pc = current_pc + random_offset */ + pcaddi t0, 0 + add.d t0, t0, a0 + jirl zero, t0, 0xc +#endif /* CONFIG_RANDOMIZE_BASE */ +#endif /* CONFIG_RELOCATABLE */ + +#ifdef CONFIG_KASAN + bl kasan_early_init #endif bl start_kernel diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index 5c46ae8c6cac..ec5b28e570c9 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -8,19 +8,40 @@ #include #include +static unsigned int euen_mask = CSR_EUEN_FPEN; + +/* + * The critical section between kernel_fpu_begin() and kernel_fpu_end() + * is non-reentrant. It is the caller's responsibility to avoid reentrance. + * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example. + */ static DEFINE_PER_CPU(bool, in_kernel_fpu); +static DEFINE_PER_CPU(unsigned int, euen_current); void kernel_fpu_begin(void) { + unsigned int *euen_curr; + preempt_disable(); WARN_ON(this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, true); + euen_curr = this_cpu_ptr(&euen_current); - if (!is_fpu_owner()) - enable_fpu(); + *euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN); + +#ifdef CONFIG_CPU_HAS_LASX + if (*euen_curr & CSR_EUEN_LASXEN) + _save_lasx(¤t->thread.fpu); else +#endif +#ifdef CONFIG_CPU_HAS_LSX + if (*euen_curr & CSR_EUEN_LSXEN) + _save_lsx(¤t->thread.fpu); + else +#endif + if (*euen_curr & CSR_EUEN_FPEN) _save_fp(¤t->thread.fpu); write_fcsr(LOONGARCH_FCSR0, 0); @@ -29,15 +50,41 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin); void kernel_fpu_end(void) { + unsigned int *euen_curr; + WARN_ON(!this_cpu_read(in_kernel_fpu)); - if (!is_fpu_owner()) - disable_fpu(); + euen_curr = this_cpu_ptr(&euen_current); + +#ifdef CONFIG_CPU_HAS_LASX + if (*euen_curr & CSR_EUEN_LASXEN) + _restore_lasx(¤t->thread.fpu); else +#endif +#ifdef CONFIG_CPU_HAS_LSX + if (*euen_curr & CSR_EUEN_LSXEN) + _restore_lsx(¤t->thread.fpu); + else +#endif + if (*euen_curr & CSR_EUEN_FPEN) _restore_fp(¤t->thread.fpu); + *euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN); + this_cpu_write(in_kernel_fpu, false); preempt_enable(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); + +static int __init init_euen_mask(void) +{ + if (cpu_has_lsx) + euen_mask |= CSR_EUEN_LSXEN; + + if (cpu_has_lasx) + euen_mask |= CSR_EUEN_LASXEN; + + return 0; +} +arch_initcall(init_euen_mask); diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c new file mode 100644 index 000000000000..445c452d72a7 --- /dev/null +++ b/arch/loongarch/kernel/kgdb.c @@ -0,0 +1,727 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * LoongArch KGDB support + * + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +int kgdb_watch_activated; +static unsigned int stepped_opcode; +static unsigned long stepped_address; + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) }, + { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) }, + { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) }, + { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) }, + { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) }, + { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) }, + { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) }, + { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) }, + { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) }, + { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) }, + { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) }, + { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) }, + { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) }, + { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) }, + { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) }, + { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) }, + { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) }, + { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) }, + { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) }, + { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) }, + { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) }, + { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) }, + { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) }, + { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) }, + { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) }, + { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) }, + { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) }, + { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) }, + { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) }, + { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) }, + { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) }, + { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) }, + { "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) }, + { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) }, + { "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) }, + { "f0", GDB_SIZEOF_REG, 0 }, + { "f1", GDB_SIZEOF_REG, 1 }, + { "f2", GDB_SIZEOF_REG, 2 }, + { "f3", GDB_SIZEOF_REG, 3 }, + { "f4", GDB_SIZEOF_REG, 4 }, + { "f5", GDB_SIZEOF_REG, 5 }, + { "f6", GDB_SIZEOF_REG, 6 }, + { "f7", GDB_SIZEOF_REG, 7 }, + { "f8", GDB_SIZEOF_REG, 8 }, + { "f9", GDB_SIZEOF_REG, 9 }, + { "f10", GDB_SIZEOF_REG, 10 }, + { "f11", GDB_SIZEOF_REG, 11 }, + { "f12", GDB_SIZEOF_REG, 12 }, + { "f13", GDB_SIZEOF_REG, 13 }, + { "f14", GDB_SIZEOF_REG, 14 }, + { "f15", GDB_SIZEOF_REG, 15 }, + { "f16", GDB_SIZEOF_REG, 16 }, + { "f17", GDB_SIZEOF_REG, 17 }, + { "f18", GDB_SIZEOF_REG, 18 }, + { "f19", GDB_SIZEOF_REG, 19 }, + { "f20", GDB_SIZEOF_REG, 20 }, + { "f21", GDB_SIZEOF_REG, 21 }, + { "f22", GDB_SIZEOF_REG, 22 }, + { "f23", GDB_SIZEOF_REG, 23 }, + { "f24", GDB_SIZEOF_REG, 24 }, + { "f25", GDB_SIZEOF_REG, 25 }, + { "f26", GDB_SIZEOF_REG, 26 }, + { "f27", GDB_SIZEOF_REG, 27 }, + { "f28", GDB_SIZEOF_REG, 28 }, + { "f29", GDB_SIZEOF_REG, 29 }, + { "f30", GDB_SIZEOF_REG, 30 }, + { "f31", GDB_SIZEOF_REG, 31 }, + { "fcc0", 1, 0 }, + { "fcc1", 1, 1 }, + { "fcc2", 1, 2 }, + { "fcc3", 1, 3 }, + { "fcc4", 1, 4 }, + { "fcc5", 1, 5 }, + { "fcc6", 1, 6 }, + { "fcc7", 1, 7 }, + { "fcsr", 4, 0 }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + int reg_offset, reg_size; + + if (regno < 0 || regno >= DBG_MAX_REG_NUM) + return NULL; + + reg_offset = dbg_reg_def[regno].offset; + reg_size = dbg_reg_def[regno].size; + + if (reg_offset == -1) + goto out; + + /* Handle general-purpose/orig_a0/pc/badv registers */ + if (regno <= DBG_PT_REGS_END) { + memcpy(mem, (void *)regs + reg_offset, reg_size); + goto out; + } + + if (!(regs->csr_euen & CSR_EUEN_FPEN)) + goto out; + + save_fp(current); + + /* Handle FP registers */ + switch (regno) { + case DBG_FCSR: /* Process the fcsr */ + memcpy(mem, (void *)¤t->thread.fpu.fcsr, reg_size); + break; + case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */ + memcpy(mem, (void *)¤t->thread.fpu.fcc + reg_offset, reg_size); + break; + case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */ + memcpy(mem, (void *)¤t->thread.fpu.fpr[reg_offset], reg_size); + break; + default: + break; + } + +out: + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + int reg_offset, reg_size; + + if (regno < 0 || regno >= DBG_MAX_REG_NUM) + return -EINVAL; + + reg_offset = dbg_reg_def[regno].offset; + reg_size = dbg_reg_def[regno].size; + + if (reg_offset == -1) + return 0; + + /* Handle general-purpose/orig_a0/pc/badv registers */ + if (regno <= DBG_PT_REGS_END) { + memcpy((void *)regs + reg_offset, mem, reg_size); + return 0; + } + + if (!(regs->csr_euen & CSR_EUEN_FPEN)) + return 0; + + /* Handle FP registers */ + switch (regno) { + case DBG_FCSR: /* Process the fcsr */ + memcpy((void *)¤t->thread.fpu.fcsr, mem, reg_size); + break; + case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */ + memcpy((void *)¤t->thread.fpu.fcc + reg_offset, mem, reg_size); + break; + case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */ + memcpy((void *)¤t->thread.fpu.fpr[reg_offset], mem, reg_size); + break; + default: + break; + } + + restore_fp(current); + + return 0; +} + +/* + * Similar to regs_to_gdb_regs() except that process is sleeping and so + * we may not be able to get all the info. + */ +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + /* Initialize to zero */ + memset((char *)gdb_regs, 0, NUMREGBYTES); + + gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01; + gdb_regs[DBG_LOONGARCH_TP] = (long)p; + gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03; + + /* S0 - S8 */ + gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23; + gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24; + gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25; + gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26; + gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27; + gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28; + gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29; + gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30; + gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31; + + /* + * PC use return address (RA), i.e. the moment after return from __switch_to() + */ + gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01; +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->csr_era = pc; +} + +void arch_kgdb_breakpoint(void) +{ + __asm__ __volatile__ ( \ + ".globl kgdb_breakinst\n\t" \ + "nop\n" \ + "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */ +} + +/* + * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, + * then try to fall into the debugger + */ +static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + + /* Userspace events, ignore. */ + if (user_mode(regs)) + return NOTIFY_DONE; + + if (!kgdb_io_module_registered) + return NOTIFY_DONE; + + if (atomic_read(&kgdb_active) != -1) + kgdb_nmicallback(smp_processor_id(), regs); + + if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs)) + return NOTIFY_DONE; + + if (atomic_read(&kgdb_setting_breakpoint)) + if (regs->csr_era == (unsigned long)&kgdb_breakinst) + regs->csr_era += LOONGARCH_INSN_SIZE; + + return NOTIFY_STOP; +} + +bool kgdb_breakpoint_handler(struct pt_regs *regs) +{ + struct die_args args = { + .regs = regs, + .str = "Break", + .err = BRK_KDB, + .trapnr = read_csr_excode(), + .signr = SIGTRAP, + + }; + + return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_loongarch_notify, +}; + +static inline void kgdb_arch_update_addr(struct pt_regs *regs, + char *remcom_in_buffer) +{ + unsigned long addr; + char *ptr; + + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + regs->csr_era = addr; +} + +/* Calculate the new address for after a step */ +static int get_step_address(struct pt_regs *regs, unsigned long *next_addr) +{ + char cj_val; + unsigned int si, si_l, si_h, rd, rj, cj; + unsigned long pc = instruction_pointer(regs); + union loongarch_instruction *ip = (union loongarch_instruction *)pc; + + if (pc & 3) { + pr_warn("%s: invalid pc 0x%lx\n", __func__, pc); + return -EINVAL; + } + + *next_addr = pc + LOONGARCH_INSN_SIZE; + + si_h = ip->reg0i26_format.immediate_h; + si_l = ip->reg0i26_format.immediate_l; + switch (ip->reg0i26_format.opcode) { + case b_op: + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27); + return 0; + case bl_op: + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27); + regs->regs[1] = pc + LOONGARCH_INSN_SIZE; + return 0; + } + + rj = ip->reg1i21_format.rj; + cj = (rj & 0x07) + DBG_FCC_BASE; + si_l = ip->reg1i21_format.immediate_l; + si_h = ip->reg1i21_format.immediate_h; + dbg_get_reg(cj, &cj_val, regs); + switch (ip->reg1i21_format.opcode) { + case beqz_op: + if (regs->regs[rj] == 0) + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); + return 0; + case bnez_op: + if (regs->regs[rj] != 0) + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); + return 0; + case bceqz_op: /* bceqz_op = bcnez_op */ + if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */ + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); + if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */ + *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22); + return 0; + } + + rj = ip->reg2i16_format.rj; + rd = ip->reg2i16_format.rd; + si = ip->reg2i16_format.immediate; + switch (ip->reg2i16_format.opcode) { + case beq_op: + if (regs->regs[rj] == regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case bne_op: + if (regs->regs[rj] != regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case blt_op: + if ((long)regs->regs[rj] < (long)regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case bge_op: + if ((long)regs->regs[rj] >= (long)regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case bltu_op: + if (regs->regs[rj] < regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case bgeu_op: + if (regs->regs[rj] >= regs->regs[rd]) + *next_addr = pc + sign_extend64(si << 2, 17); + return 0; + case jirl_op: + regs->regs[rd] = pc + LOONGARCH_INSN_SIZE; + *next_addr = regs->regs[rj] + sign_extend64(si << 2, 17); + return 0; + } + + return 0; +} + +static int do_single_step(struct pt_regs *regs) +{ + int error = 0; + unsigned long addr = 0; /* Determine where the target instruction will send us to */ + + error = get_step_address(regs, &addr); + if (error) + return error; + + /* Store the opcode in the stepped address */ + error = get_kernel_nofault(stepped_opcode, (void *)addr); + if (error) + return error; + + stepped_address = addr; + + /* Replace the opcode with the break instruction */ + error = copy_to_kernel_nofault((void *)stepped_address, + arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); + flush_icache_range(addr, addr + BREAK_INSTR_SIZE); + + if (error) { + stepped_opcode = 0; + stepped_address = 0; + } else { + kgdb_single_step = 1; + atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); + } + + return error; +} + +/* Undo a single step */ +static void undo_single_step(struct pt_regs *regs) +{ + if (stepped_opcode) { + copy_to_kernel_nofault((void *)stepped_address, + (void *)&stepped_opcode, BREAK_INSTR_SIZE); + flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE); + } + + stepped_opcode = 0; + stepped_address = 0; + kgdb_single_step = 0; + atomic_set(&kgdb_cpu_doing_single_step, -1); +} + +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *regs) +{ + int ret = 0; + + undo_single_step(regs); + regs->csr_prmd |= CSR_PRMD_PWE; + + switch (remcom_in_buffer[0]) { + case 'D': + case 'k': + regs->csr_prmd &= ~CSR_PRMD_PWE; + fallthrough; + case 'c': + kgdb_arch_update_addr(regs, remcom_in_buffer); + break; + case 's': + kgdb_arch_update_addr(regs, remcom_in_buffer); + ret = do_single_step(regs); + break; + default: + ret = -1; + } + + return ret; +} + +static struct hw_breakpoint { + unsigned int enabled; + unsigned long addr; + int len; + int type; + struct perf_event * __percpu *pev; +} breakinfo[LOONGARCH_MAX_BRP]; + +static int hw_break_reserve_slot(int breakno) +{ + int cpu, cnt = 0; + struct perf_event **pevent; + + for_each_online_cpu(cpu) { + cnt++; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_reserve_bp_slot(*pevent)) + goto fail; + } + + return 0; + +fail: + for_each_online_cpu(cpu) { + cnt--; + if (!cnt) + break; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + dbg_release_bp_slot(*pevent); + } + + return -1; +} + +static int hw_break_release_slot(int breakno) +{ + int cpu; + struct perf_event **pevent; + + if (dbg_is_early) + return 0; + + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_release_bp_slot(*pevent)) + /* + * The debugger is responsible for handing the retry on + * remove failure. + */ + return -1; + } + + return 0; +} + +static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) +{ + int i; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) + if (!breakinfo[i].enabled) + break; + + if (i == LOONGARCH_MAX_BRP) + return -1; + + switch (bptype) { + case BP_HARDWARE_BREAKPOINT: + breakinfo[i].type = HW_BREAKPOINT_X; + break; + case BP_READ_WATCHPOINT: + breakinfo[i].type = HW_BREAKPOINT_R; + break; + case BP_WRITE_WATCHPOINT: + breakinfo[i].type = HW_BREAKPOINT_W; + break; + case BP_ACCESS_WATCHPOINT: + breakinfo[i].type = HW_BREAKPOINT_RW; + break; + default: + return -1; + } + + switch (len) { + case 1: + breakinfo[i].len = HW_BREAKPOINT_LEN_1; + break; + case 2: + breakinfo[i].len = HW_BREAKPOINT_LEN_2; + break; + case 4: + breakinfo[i].len = HW_BREAKPOINT_LEN_4; + break; + case 8: + breakinfo[i].len = HW_BREAKPOINT_LEN_8; + break; + default: + return -1; + } + + breakinfo[i].addr = addr; + if (hw_break_reserve_slot(i)) { + breakinfo[i].addr = 0; + return -1; + } + breakinfo[i].enabled = 1; + + return 0; +} + +static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) +{ + int i; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) + if (breakinfo[i].addr == addr && breakinfo[i].enabled) + break; + + if (i == LOONGARCH_MAX_BRP) + return -1; + + if (hw_break_release_slot(i)) { + pr_err("Cannot remove hw breakpoint at %lx\n", addr); + return -1; + } + breakinfo[i].enabled = 0; + + return 0; +} + +static void kgdb_disable_hw_break(struct pt_regs *regs) +{ + int i; + int cpu = raw_smp_processor_id(); + struct perf_event *bp; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { + if (!breakinfo[i].enabled) + continue; + + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (bp->attr.disabled == 1) + continue; + + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; + } + + /* Disable hardware debugging while we are in kgdb */ + csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); +} + +static void kgdb_remove_all_hw_break(void) +{ + int i; + int cpu = raw_smp_processor_id(); + struct perf_event *bp; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { + if (!breakinfo[i].enabled) + continue; + + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (!bp->attr.disabled) { + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; + continue; + } + + if (hw_break_release_slot(i)) + pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr); + breakinfo[i].enabled = 0; + } + + csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); + kgdb_watch_activated = 0; +} + +static void kgdb_correct_hw_break(void) +{ + int i, activated = 0; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { + struct perf_event *bp; + int val; + int cpu = raw_smp_processor_id(); + + if (!breakinfo[i].enabled) + continue; + + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (bp->attr.disabled != 1) + continue; + + bp->attr.bp_addr = breakinfo[i].addr; + bp->attr.bp_len = breakinfo[i].len; + bp->attr.bp_type = breakinfo[i].type; + + val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp)); + if (val) + return; + + val = arch_install_hw_breakpoint(bp); + if (!val) + bp->attr.disabled = 0; + activated = 1; + } + + csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); + kgdb_watch_activated = activated; +} + +const struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */ + .flags = KGDB_HW_BREAKPOINT, + .set_hw_breakpoint = kgdb_set_hw_break, + .remove_hw_breakpoint = kgdb_remove_hw_break, + .disable_hw_break = kgdb_disable_hw_break, + .remove_all_hw_break = kgdb_remove_all_hw_break, + .correct_hw_break = kgdb_correct_hw_break, +}; + +int kgdb_arch_init(void) +{ + return register_die_notifier(&kgdb_notifier); +} + +void kgdb_arch_late(void) +{ + int i, cpu; + struct perf_event_attr attr; + struct perf_event **pevent; + + hw_breakpoint_init(&attr); + + attr.bp_addr = (unsigned long)kgdb_arch_init; + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { + if (breakinfo[i].pev) + continue; + + breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL); + if (IS_ERR((void * __force)breakinfo[i].pev)) { + pr_err("kgdb: Could not allocate hw breakpoints.\n"); + breakinfo[i].pev = NULL; + return; + } + + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[i].pev, cpu); + if (pevent[0]->destroy) { + pevent[0]->destroy = NULL; + release_bp_slot(*pevent); + } + } + } +} + +void kgdb_arch_exit(void) +{ + int i; + + for (i = 0; i < LOONGARCH_MAX_BRP; i++) { + if (breakinfo[i].pev) { + unregister_wide_hw_breakpoint(breakinfo[i].pev); + breakinfo[i].pev = NULL; + } + } + + unregister_die_notifier(&kgdb_notifier); +} diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S new file mode 100644 index 000000000000..9c75120a26d8 --- /dev/null +++ b/arch/loongarch/kernel/lbt.S @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Qi Hu + * Huacai Chen + * + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include + +#define SCR_REG_WIDTH 8 + + .macro EX insn, reg, src, offs +.ex\@: \insn \reg, \src, \offs + _asm_extable .ex\@, .L_lbt_fault + .endm + +/* + * Save a thread's lbt context. + */ +SYM_FUNC_START(_save_lbt) + movscr2gr t1, $scr0 # save scr + stptr.d t1, a0, THREAD_SCR0 + movscr2gr t1, $scr1 + stptr.d t1, a0, THREAD_SCR1 + movscr2gr t1, $scr2 + stptr.d t1, a0, THREAD_SCR2 + movscr2gr t1, $scr3 + stptr.d t1, a0, THREAD_SCR3 + + x86mfflag t1, 0x3f # save eflags + stptr.d t1, a0, THREAD_EFLAGS + jr ra +SYM_FUNC_END(_save_lbt) +EXPORT_SYMBOL(_save_lbt) + +/* + * Restore a thread's lbt context. + */ +SYM_FUNC_START(_restore_lbt) + ldptr.d t1, a0, THREAD_SCR0 # restore scr + movgr2scr $scr0, t1 + ldptr.d t1, a0, THREAD_SCR1 + movgr2scr $scr1, t1 + ldptr.d t1, a0, THREAD_SCR2 + movgr2scr $scr2, t1 + ldptr.d t1, a0, THREAD_SCR3 + movgr2scr $scr3, t1 + + ldptr.d t1, a0, THREAD_EFLAGS # restore eflags + x86mtflag t1, 0x3f + jr ra +SYM_FUNC_END(_restore_lbt) +EXPORT_SYMBOL(_restore_lbt) + +/* + * Load scr/eflag with zero. + */ +SYM_FUNC_START(_init_lbt) + movgr2scr $scr0, zero + movgr2scr $scr1, zero + movgr2scr $scr2, zero + movgr2scr $scr3, zero + + x86mtflag zero, 0x3f + jr ra +SYM_FUNC_END(_init_lbt) + +/* + * a0: scr + * a1: eflag + */ +SYM_FUNC_START(_save_lbt_context) + movscr2gr t1, $scr0 # save scr + EX st.d t1, a0, (0 * SCR_REG_WIDTH) + movscr2gr t1, $scr1 + EX st.d t1, a0, (1 * SCR_REG_WIDTH) + movscr2gr t1, $scr2 + EX st.d t1, a0, (2 * SCR_REG_WIDTH) + movscr2gr t1, $scr3 + EX st.d t1, a0, (3 * SCR_REG_WIDTH) + + x86mfflag t1, 0x3f # save eflags + EX st.w t1, a1, 0 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_save_lbt_context) + +/* + * a0: scr + * a1: eflag + */ +SYM_FUNC_START(_restore_lbt_context) + EX ld.d t1, a0, (0 * SCR_REG_WIDTH) # restore scr + movgr2scr $scr0, t1 + EX ld.d t1, a0, (1 * SCR_REG_WIDTH) + movgr2scr $scr1, t1 + EX ld.d t1, a0, (2 * SCR_REG_WIDTH) + movgr2scr $scr2, t1 + EX ld.d t1, a0, (3 * SCR_REG_WIDTH) + movgr2scr $scr3, t1 + + EX ld.w t1, a1, 0 # restore eflags + x86mtflag t1, 0x3f + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_restore_lbt_context) + +/* + * a0: ftop + */ +SYM_FUNC_START(_save_ftop_context) + x86mftop t1 + st.w t1, a0, 0 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_save_ftop_context) + +/* + * a0: ftop + */ +SYM_FUNC_START(_restore_ftop_context) + ld.w t1, a0, 0 + andi t1, t1, 0x7 + la.pcrel a0, 1f + alsl.d a0, t1, a0, 3 + jr a0 +1: + x86mttop 0 + b 2f + x86mttop 1 + b 2f + x86mttop 2 + b 2f + x86mttop 3 + b 2f + x86mttop 4 + b 2f + x86mttop 5 + b 2f + x86mttop 6 + b 2f + x86mttop 7 +2: + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_restore_ftop_context) + +.L_lbt_fault: + li.w a0, -EFAULT # failure + jr ra diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index 708665895b47..c7d33c489e04 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -67,39 +67,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) void __init pcpu_populate_pte(unsigned long addr) { - pgd_t *pgd = pgd_offset_k(addr); - p4d_t *p4d = p4d_offset(pgd, addr); - pud_t *pud; - pmd_t *pmd; - - if (p4d_none(*p4d)) { - pud_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - pgd_populate(&init_mm, pgd, new); -#ifndef __PAGETABLE_PUD_FOLDED - pud_init(new); -#endif - } - - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - pud_populate(&init_mm, pud, new); -#ifndef __PAGETABLE_PMD_FOLDED - pmd_init(new); -#endif - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmd, new); - } + populate_kernel_pte(addr); } void __init setup_per_cpu_areas(void) @@ -470,7 +438,6 @@ void __init mem_init(void) { high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); memblock_free_all(); - setup_zero_pages(); /* This comes from node 0 */ } int pcibus_to_node(struct pci_bus *bus) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index ba457e43f5be..3cb082e0c992 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -82,9 +83,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) euen = regs->csr_euen & ~(CSR_EUEN_FPEN); regs->csr_euen = euen; lose_fpu(0); + lose_lbt(0); clear_thread_flag(TIF_LSX_CTX_LIVE); clear_thread_flag(TIF_LASX_CTX_LIVE); + clear_thread_flag(TIF_LBT_CTX_LIVE); clear_used_math(); regs->csr_era = pc; regs->regs[3] = sp; @@ -121,10 +124,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) preempt_enable(); - if (used_math()) - memcpy(dst, src, sizeof(struct task_struct)); - else + if (!used_math()) memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr)); + else + memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0)); + +#ifdef CONFIG_CPU_HAS_LBT + memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt)); +#endif return 0; } @@ -189,8 +196,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) ptrace_hw_copy_thread(p); clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDSIMD); + clear_tsk_thread_flag(p, TIF_USEDLBT); clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); + clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE); return 0; } diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index f72adbf530c6..c114c5ef1332 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -338,6 +339,46 @@ static int simd_set(struct task_struct *target, #endif /* CONFIG_CPU_HAS_LSX */ +#ifdef CONFIG_CPU_HAS_LBT +static int lbt_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int r; + + r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0)); + r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1)); + r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2)); + r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3)); + r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32)); + r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32)); + + return r; +} + +static int lbt_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int err = 0; + const int eflags_start = 4 * sizeof(target->thread.lbt.scr0); + const int ftop_start = eflags_start + sizeof(u32); + + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.lbt.scr0, + 0, 4 * sizeof(target->thread.lbt.scr0)); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.lbt.eflags, + eflags_start, ftop_start); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.ftop, + ftop_start, ftop_start + sizeof(u32)); + + return err; +} +#endif /* CONFIG_CPU_HAS_LBT */ + #ifdef CONFIG_HAVE_HW_BREAKPOINT /* @@ -802,6 +843,9 @@ enum loongarch_regset { #ifdef CONFIG_CPU_HAS_LASX REGSET_LASX, #endif +#ifdef CONFIG_CPU_HAS_LBT + REGSET_LBT, +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT REGSET_HW_BREAK, REGSET_HW_WATCH, @@ -853,6 +897,16 @@ static const struct user_regset loongarch64_regsets[] = { .set = simd_set, }, #endif +#ifdef CONFIG_CPU_HAS_LBT + [REGSET_LBT] = { + .core_note_type = NT_LOONGARCH_LBT, + .n = 5, + .size = sizeof(u64), + .align = sizeof(u64), + .regset_get = lbt_get, + .set = lbt_set, + }, +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT [REGSET_HW_BREAK] = { .core_note_type = NT_LOONGARCH_HW_BREAK, diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 01f94d1e3edf..6c3eff9af9fb 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -157,12 +157,11 @@ static inline void __init update_reloc_offset(unsigned long *addr, long random_o *new_addr = (unsigned long)reloc_offset; } -void * __init relocate_kernel(void) +unsigned long __init relocate_kernel(void) { unsigned long kernel_length; unsigned long random_offset = 0; void *location_new = _text; /* Default to original kernel start */ - void *kernel_entry = start_kernel; /* Default to original kernel entry point */ char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */ strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE); @@ -190,9 +189,6 @@ void * __init relocate_kernel(void) reloc_offset += random_offset; - /* Return the new kernel's entry point */ - kernel_entry = RELOCATED_KASLR(start_kernel); - /* The current thread is now within the relocated kernel */ __current_thread_info = RELOCATED_KASLR(__current_thread_info); @@ -204,7 +200,7 @@ void * __init relocate_kernel(void) relocate_absolute(random_offset); - return kernel_entry; + return random_offset; } /* diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 9d830ab4e302..7783f0a3d742 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -626,4 +626,8 @@ void __init setup_arch(char **cmdline_p) #endif paging_init(); + +#ifdef CONFIG_KASAN + kasan_init(); +#endif } diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index ceb899366c0a..504fdfe85203 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -44,6 +45,9 @@ /* Make sure we will not lose FPU ownership */ #define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); }) #define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); }) +/* Make sure we will not lose LBT ownership */ +#define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) +#define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) /* Assembly functions to move context to/from the FPU */ extern asmlinkage int @@ -59,6 +63,13 @@ _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); extern asmlinkage int _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +#ifdef CONFIG_CPU_HAS_LBT +extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); +extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); +extern asmlinkage int _save_ftop_context(void __user *ftop); +extern asmlinkage int _restore_ftop_context(void __user *ftop); +#endif + struct rt_sigframe { struct siginfo rs_info; struct ucontext rs_uctx; @@ -75,6 +86,7 @@ struct extctx_layout { struct _ctx_layout fpu; struct _ctx_layout lsx; struct _ctx_layout lasx; + struct _ctx_layout lbt; struct _ctx_layout end; }; @@ -215,6 +227,52 @@ static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx) return err; } +#ifdef CONFIG_CPU_HAS_LBT +static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx) +{ + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + err |= __put_user(current->thread.lbt.scr0, ®s[0]); + err |= __put_user(current->thread.lbt.scr1, ®s[1]); + err |= __put_user(current->thread.lbt.scr2, ®s[2]); + err |= __put_user(current->thread.lbt.scr3, ®s[3]); + err |= __put_user(current->thread.lbt.eflags, eflags); + + return err; +} + +static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx) +{ + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + err |= __get_user(current->thread.lbt.scr0, ®s[0]); + err |= __get_user(current->thread.lbt.scr1, ®s[1]); + err |= __get_user(current->thread.lbt.scr2, ®s[2]); + err |= __get_user(current->thread.lbt.scr3, ®s[3]); + err |= __get_user(current->thread.lbt.eflags, eflags); + + return err; +} + +static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx) +{ + uint32_t __user *ftop = &ctx->ftop; + + return __put_user(current->thread.fpu.ftop, ftop); +} + +static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx) +{ + uint32_t __user *ftop = &ctx->ftop; + + return __get_user(current->thread.fpu.ftop, ftop); +} +#endif + /* * Wrappers for the assembly _{save,restore}_fp_context functions. */ @@ -272,6 +330,41 @@ static int restore_hw_lasx_context(struct lasx_context __user *ctx) return _restore_lasx_context(regs, fcc, fcsr); } +/* + * Wrappers for the assembly _{save,restore}_lbt_context functions. + */ +#ifdef CONFIG_CPU_HAS_LBT +static int save_hw_lbt_context(struct lbt_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + return _save_lbt_context(regs, eflags); +} + +static int restore_hw_lbt_context(struct lbt_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + return _restore_lbt_context(regs, eflags); +} + +static int save_hw_ftop_context(struct lbt_context __user *ctx) +{ + uint32_t __user *ftop = &ctx->ftop; + + return _save_ftop_context(ftop); +} + +static int restore_hw_ftop_context(struct lbt_context __user *ctx) +{ + uint32_t __user *ftop = &ctx->ftop; + + return _restore_ftop_context(ftop); +} +#endif + static int fcsr_pending(unsigned int __user *fcsr) { int err, sig = 0; @@ -519,6 +612,77 @@ static int protected_restore_lasx_context(struct extctx_layout *extctx) return err ?: sig; } +#ifdef CONFIG_CPU_HAS_LBT +static int protected_save_lbt_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lbt.addr; + struct lbt_context __user *lbt_ctx = + (struct lbt_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs; + uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags; + + while (1) { + lock_lbt_owner(); + if (is_lbt_owner()) + err |= save_hw_lbt_context(lbt_ctx); + else + err |= copy_lbt_to_sigcontext(lbt_ctx); + if (is_fpu_owner()) + err |= save_hw_ftop_context(lbt_ctx); + else + err |= copy_ftop_to_sigcontext(lbt_ctx); + unlock_lbt_owner(); + + err |= __put_user(LBT_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lbt.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LBT context and try again */ + err = __put_user(0, ®s[0]) | __put_user(0, eflags); + + if (err) + return err; + } + + return err; +} + +static int protected_restore_lbt_context(struct extctx_layout *extctx) +{ + int err = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lbt.addr; + struct lbt_context __user *lbt_ctx = + (struct lbt_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs; + uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags; + + while (1) { + lock_lbt_owner(); + if (is_lbt_owner()) + err |= restore_hw_lbt_context(lbt_ctx); + else + err |= copy_lbt_from_sigcontext(lbt_ctx); + if (is_fpu_owner()) + err |= restore_hw_ftop_context(lbt_ctx); + else + err |= copy_ftop_from_sigcontext(lbt_ctx); + unlock_lbt_owner(); + + if (likely(!err)) + break; + /* Touch the LBT context and try again */ + err = __get_user(tmp, ®s[0]) | __get_user(tmp, eflags); + + if (err) + return err; + } + + return err; +} +#endif + static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, struct extctx_layout *extctx) { @@ -539,6 +703,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); +#ifdef CONFIG_CPU_HAS_LBT + if (extctx->lbt.addr) + err |= protected_save_lbt_context(extctx); +#endif + /* Set the "end" magic */ info = (struct sctx_info *)extctx->end.addr; err |= __put_user(0, &info->magic); @@ -584,6 +753,13 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout * extctx->lasx.addr = info; break; + case LBT_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lbt_context))) + goto invalid; + extctx->lbt.addr = info; + break; + default: goto invalid; } @@ -636,6 +812,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc else if (extctx.fpu.addr) err |= protected_restore_fpu_context(&extctx); +#ifdef CONFIG_CPU_HAS_LBT + if (extctx.lbt.addr) + err |= protected_restore_lbt_context(&extctx); +#endif + bad: return err; } @@ -700,6 +881,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp); } +#ifdef CONFIG_CPU_HAS_LBT + if (cpu_has_lbt && thread_lbt_context_live()) { + new_sp = extframe_alloc(extctx, &extctx->lbt, + sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp); + } +#endif + return new_sp; } diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 2463d2fea21f..92270f14db94 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -18,17 +18,19 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct pt_regs dummyregs; struct unwind_state state; - regs = &dummyregs; + if (!regs) { + regs = &dummyregs; - if (task == current) { - regs->regs[3] = (unsigned long)__builtin_frame_address(0); - regs->csr_era = (unsigned long)__builtin_return_address(0); - } else { - regs->regs[3] = thread_saved_fp(task); - regs->csr_era = thread_saved_ra(task); + if (task == current) { + regs->regs[3] = (unsigned long)__builtin_frame_address(0); + regs->csr_era = (unsigned long)__builtin_return_address(0); + } else { + regs->regs[3] = thread_saved_fp(task); + regs->csr_era = thread_saved_ra(task); + } + regs->regs[1] = 0; } - regs->regs[1] = 0; for (unwind_start(&state, task, regs); !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 89699db45cec..65214774ef7c 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -36,7 +36,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -702,6 +704,11 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) * pertain to them. */ switch (bcode) { + case BRK_KDB: + if (kgdb_breakpoint_handler(regs)) + goto out; + else + break; case BRK_KPROBE_BP: if (kprobe_breakpoint_handler(regs)) goto out; @@ -768,6 +775,9 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs) #ifndef CONFIG_HAVE_HW_BREAKPOINT pr_warn("Hardware watch point handler not implemented!\n"); #else + if (kgdb_breakpoint_handler(regs)) + goto out; + if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) { int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1); unsigned long pc = instruction_pointer(regs); @@ -966,13 +976,47 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs) irqentry_exit(regs, state); } +static void init_restore_lbt(void) +{ + if (!thread_lbt_context_live()) { + /* First time LBT context user */ + init_lbt(); + set_thread_flag(TIF_LBT_CTX_LIVE); + } else { + if (!is_lbt_owner()) + own_lbt_inatomic(1); + } + + BUG_ON(!is_lbt_enabled()); +} + asmlinkage void noinstr do_lbt(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); - local_irq_enable(); - force_sig(SIGILL); - local_irq_disable(); + /* + * BTD (Binary Translation Disable exception) can be triggered + * during FP save/restore if TM (Top Mode) is on, which may + * cause irq_enable during 'switch_to'. To avoid this situation + * (including the user using 'MOVGR2GCSR' to turn on TM, which + * will not trigger the BTE), we need to check PRMD first. + */ + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_enable(); + + if (!cpu_has_lbt) { + force_sig(SIGILL); + goto out; + } + BUG_ON(is_lbt_enabled()); + + preempt_disable(); + init_restore_lbt(); + preempt_enable(); + +out: + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_disable(); irqentry_exit(regs, state); } diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index d60d4e096cfa..a77bf160bfc4 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -6,4 +6,6 @@ lib-y += delay.o memset.o memcpy.o memmove.o \ clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o +obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o + obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 0790eadce166..be741544e62b 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -11,19 +11,6 @@ #include #include -.irp to, 0, 1, 2, 3, 4, 5, 6, 7 -.L_fixup_handle_\to\(): - sub.d a0, a2, a0 - addi.d a0, a0, (\to) * (-8) - jr ra -.endr - -.irp to, 0, 2, 4 -.L_fixup_handle_s\to\(): - addi.d a0, a1, -\to - jr ra -.endr - SYM_FUNC_START(__clear_user) /* * Some CPUs support hardware unaligned access @@ -51,7 +38,7 @@ SYM_FUNC_START(__clear_user_generic) 2: move a0, a1 jr ra - _asm_extable 1b, .L_fixup_handle_s0 + _asm_extable 1b, 2b SYM_FUNC_END(__clear_user_generic) /* @@ -173,33 +160,47 @@ SYM_FUNC_START(__clear_user_fast) jr ra /* fixup and ex_table */ - _asm_extable 0b, .L_fixup_handle_0 - _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_1 - _asm_extable 3b, .L_fixup_handle_2 - _asm_extable 4b, .L_fixup_handle_3 - _asm_extable 5b, .L_fixup_handle_4 - _asm_extable 6b, .L_fixup_handle_5 - _asm_extable 7b, .L_fixup_handle_6 - _asm_extable 8b, .L_fixup_handle_7 - _asm_extable 9b, .L_fixup_handle_0 - _asm_extable 10b, .L_fixup_handle_1 - _asm_extable 11b, .L_fixup_handle_2 - _asm_extable 12b, .L_fixup_handle_3 - _asm_extable 13b, .L_fixup_handle_0 - _asm_extable 14b, .L_fixup_handle_1 - _asm_extable 15b, .L_fixup_handle_0 - _asm_extable 16b, .L_fixup_handle_0 - _asm_extable 17b, .L_fixup_handle_s0 - _asm_extable 18b, .L_fixup_handle_s0 - _asm_extable 19b, .L_fixup_handle_s0 - _asm_extable 20b, .L_fixup_handle_s2 - _asm_extable 21b, .L_fixup_handle_s0 - _asm_extable 22b, .L_fixup_handle_s0 - _asm_extable 23b, .L_fixup_handle_s4 - _asm_extable 24b, .L_fixup_handle_s0 - _asm_extable 25b, .L_fixup_handle_s4 - _asm_extable 26b, .L_fixup_handle_s0 - _asm_extable 27b, .L_fixup_handle_s4 - _asm_extable 28b, .L_fixup_handle_s0 +.Llarge_fixup: + sub.d a1, a2, a0 + +.Lsmall_fixup: +29: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 29b + +.Lexit: + move a0, a1 + jr ra + + _asm_extable 0b, .Lsmall_fixup + _asm_extable 1b, .Llarge_fixup + _asm_extable 2b, .Llarge_fixup + _asm_extable 3b, .Llarge_fixup + _asm_extable 4b, .Llarge_fixup + _asm_extable 5b, .Llarge_fixup + _asm_extable 6b, .Llarge_fixup + _asm_extable 7b, .Llarge_fixup + _asm_extable 8b, .Llarge_fixup + _asm_extable 9b, .Llarge_fixup + _asm_extable 10b, .Llarge_fixup + _asm_extable 11b, .Llarge_fixup + _asm_extable 12b, .Llarge_fixup + _asm_extable 13b, .Llarge_fixup + _asm_extable 14b, .Llarge_fixup + _asm_extable 15b, .Llarge_fixup + _asm_extable 16b, .Llarge_fixup + _asm_extable 17b, .Lexit + _asm_extable 18b, .Lsmall_fixup + _asm_extable 19b, .Lsmall_fixup + _asm_extable 20b, .Lsmall_fixup + _asm_extable 21b, .Lsmall_fixup + _asm_extable 22b, .Lsmall_fixup + _asm_extable 23b, .Lsmall_fixup + _asm_extable 24b, .Lsmall_fixup + _asm_extable 25b, .Lsmall_fixup + _asm_extable 26b, .Lsmall_fixup + _asm_extable 27b, .Lsmall_fixup + _asm_extable 28b, .Lsmall_fixup + _asm_extable 29b, .Lexit SYM_FUNC_END(__clear_user_fast) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index bfe3d2793d00..feec3d362803 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -11,19 +11,6 @@ #include #include -.irp to, 0, 1, 2, 3, 4, 5, 6, 7 -.L_fixup_handle_\to\(): - sub.d a0, a2, a0 - addi.d a0, a0, (\to) * (-8) - jr ra -.endr - -.irp to, 0, 2, 4 -.L_fixup_handle_s\to\(): - addi.d a0, a2, -\to - jr ra -.endr - SYM_FUNC_START(__copy_user) /* * Some CPUs support hardware unaligned access @@ -54,8 +41,8 @@ SYM_FUNC_START(__copy_user_generic) 3: move a0, a2 jr ra - _asm_extable 1b, .L_fixup_handle_s0 - _asm_extable 2b, .L_fixup_handle_s0 + _asm_extable 1b, 3b + _asm_extable 2b, 3b SYM_FUNC_END(__copy_user_generic) /* @@ -69,10 +56,10 @@ SYM_FUNC_START(__copy_user_fast) sltui t0, a2, 9 bnez t0, .Lsmall - add.d a3, a1, a2 - add.d a2, a0, a2 0: ld.d t0, a1, 0 1: st.d t0, a0, 0 + add.d a3, a1, a2 + add.d a2, a0, a2 /* align up destination address */ andi t1, a0, 7 @@ -94,7 +81,6 @@ SYM_FUNC_START(__copy_user_fast) 7: ld.d t5, a1, 40 8: ld.d t6, a1, 48 9: ld.d t7, a1, 56 - addi.d a1, a1, 64 10: st.d t0, a0, 0 11: st.d t1, a0, 8 12: st.d t2, a0, 16 @@ -103,6 +89,7 @@ SYM_FUNC_START(__copy_user_fast) 15: st.d t5, a0, 40 16: st.d t6, a0, 48 17: st.d t7, a0, 56 + addi.d a1, a1, 64 addi.d a0, a0, 64 bltu a1, a4, .Lloop64 @@ -114,11 +101,11 @@ SYM_FUNC_START(__copy_user_fast) 19: ld.d t1, a1, 8 20: ld.d t2, a1, 16 21: ld.d t3, a1, 24 - addi.d a1, a1, 32 22: st.d t0, a0, 0 23: st.d t1, a0, 8 24: st.d t2, a0, 16 25: st.d t3, a0, 24 + addi.d a1, a1, 32 addi.d a0, a0, 32 .Llt32: @@ -126,9 +113,9 @@ SYM_FUNC_START(__copy_user_fast) bgeu a1, a4, .Llt16 26: ld.d t0, a1, 0 27: ld.d t1, a1, 8 - addi.d a1, a1, 16 28: st.d t0, a0, 0 29: st.d t1, a0, 8 + addi.d a1, a1, 16 addi.d a0, a0, 16 .Llt16: @@ -136,6 +123,7 @@ SYM_FUNC_START(__copy_user_fast) bgeu a1, a4, .Llt8 30: ld.d t0, a1, 0 31: st.d t0, a0, 0 + addi.d a1, a1, 8 addi.d a0, a0, 8 .Llt8: @@ -214,62 +202,79 @@ SYM_FUNC_START(__copy_user_fast) jr ra /* fixup and ex_table */ - _asm_extable 0b, .L_fixup_handle_0 - _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_0 - _asm_extable 3b, .L_fixup_handle_0 - _asm_extable 4b, .L_fixup_handle_0 - _asm_extable 5b, .L_fixup_handle_0 - _asm_extable 6b, .L_fixup_handle_0 - _asm_extable 7b, .L_fixup_handle_0 - _asm_extable 8b, .L_fixup_handle_0 - _asm_extable 9b, .L_fixup_handle_0 - _asm_extable 10b, .L_fixup_handle_0 - _asm_extable 11b, .L_fixup_handle_1 - _asm_extable 12b, .L_fixup_handle_2 - _asm_extable 13b, .L_fixup_handle_3 - _asm_extable 14b, .L_fixup_handle_4 - _asm_extable 15b, .L_fixup_handle_5 - _asm_extable 16b, .L_fixup_handle_6 - _asm_extable 17b, .L_fixup_handle_7 - _asm_extable 18b, .L_fixup_handle_0 - _asm_extable 19b, .L_fixup_handle_0 - _asm_extable 20b, .L_fixup_handle_0 - _asm_extable 21b, .L_fixup_handle_0 - _asm_extable 22b, .L_fixup_handle_0 - _asm_extable 23b, .L_fixup_handle_1 - _asm_extable 24b, .L_fixup_handle_2 - _asm_extable 25b, .L_fixup_handle_3 - _asm_extable 26b, .L_fixup_handle_0 - _asm_extable 27b, .L_fixup_handle_0 - _asm_extable 28b, .L_fixup_handle_0 - _asm_extable 29b, .L_fixup_handle_1 - _asm_extable 30b, .L_fixup_handle_0 - _asm_extable 31b, .L_fixup_handle_0 - _asm_extable 32b, .L_fixup_handle_0 - _asm_extable 33b, .L_fixup_handle_0 - _asm_extable 34b, .L_fixup_handle_s0 - _asm_extable 35b, .L_fixup_handle_s0 - _asm_extable 36b, .L_fixup_handle_s0 - _asm_extable 37b, .L_fixup_handle_s0 - _asm_extable 38b, .L_fixup_handle_s0 - _asm_extable 39b, .L_fixup_handle_s0 - _asm_extable 40b, .L_fixup_handle_s0 - _asm_extable 41b, .L_fixup_handle_s2 - _asm_extable 42b, .L_fixup_handle_s0 - _asm_extable 43b, .L_fixup_handle_s0 - _asm_extable 44b, .L_fixup_handle_s0 - _asm_extable 45b, .L_fixup_handle_s0 - _asm_extable 46b, .L_fixup_handle_s0 - _asm_extable 47b, .L_fixup_handle_s4 - _asm_extable 48b, .L_fixup_handle_s0 - _asm_extable 49b, .L_fixup_handle_s0 - _asm_extable 50b, .L_fixup_handle_s0 - _asm_extable 51b, .L_fixup_handle_s4 - _asm_extable 52b, .L_fixup_handle_s0 - _asm_extable 53b, .L_fixup_handle_s0 - _asm_extable 54b, .L_fixup_handle_s0 - _asm_extable 55b, .L_fixup_handle_s4 - _asm_extable 56b, .L_fixup_handle_s0 - _asm_extable 57b, .L_fixup_handle_s0 +.Llarge_fixup: + sub.d a2, a2, a0 + +.Lsmall_fixup: +58: ld.b t0, a1, 0 +59: st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 58b + +.Lexit: + move a0, a2 + jr ra + + _asm_extable 0b, .Lsmall_fixup + _asm_extable 1b, .Lsmall_fixup + _asm_extable 2b, .Llarge_fixup + _asm_extable 3b, .Llarge_fixup + _asm_extable 4b, .Llarge_fixup + _asm_extable 5b, .Llarge_fixup + _asm_extable 6b, .Llarge_fixup + _asm_extable 7b, .Llarge_fixup + _asm_extable 8b, .Llarge_fixup + _asm_extable 9b, .Llarge_fixup + _asm_extable 10b, .Llarge_fixup + _asm_extable 11b, .Llarge_fixup + _asm_extable 12b, .Llarge_fixup + _asm_extable 13b, .Llarge_fixup + _asm_extable 14b, .Llarge_fixup + _asm_extable 15b, .Llarge_fixup + _asm_extable 16b, .Llarge_fixup + _asm_extable 17b, .Llarge_fixup + _asm_extable 18b, .Llarge_fixup + _asm_extable 19b, .Llarge_fixup + _asm_extable 20b, .Llarge_fixup + _asm_extable 21b, .Llarge_fixup + _asm_extable 22b, .Llarge_fixup + _asm_extable 23b, .Llarge_fixup + _asm_extable 24b, .Llarge_fixup + _asm_extable 25b, .Llarge_fixup + _asm_extable 26b, .Llarge_fixup + _asm_extable 27b, .Llarge_fixup + _asm_extable 28b, .Llarge_fixup + _asm_extable 29b, .Llarge_fixup + _asm_extable 30b, .Llarge_fixup + _asm_extable 31b, .Llarge_fixup + _asm_extable 32b, .Llarge_fixup + _asm_extable 33b, .Llarge_fixup + _asm_extable 34b, .Lexit + _asm_extable 35b, .Lexit + _asm_extable 36b, .Lsmall_fixup + _asm_extable 37b, .Lsmall_fixup + _asm_extable 38b, .Lsmall_fixup + _asm_extable 39b, .Lsmall_fixup + _asm_extable 40b, .Lsmall_fixup + _asm_extable 41b, .Lsmall_fixup + _asm_extable 42b, .Lsmall_fixup + _asm_extable 43b, .Lsmall_fixup + _asm_extable 44b, .Lsmall_fixup + _asm_extable 45b, .Lsmall_fixup + _asm_extable 46b, .Lsmall_fixup + _asm_extable 47b, .Lsmall_fixup + _asm_extable 48b, .Lsmall_fixup + _asm_extable 49b, .Lsmall_fixup + _asm_extable 50b, .Lsmall_fixup + _asm_extable 51b, .Lsmall_fixup + _asm_extable 52b, .Lsmall_fixup + _asm_extable 53b, .Lsmall_fixup + _asm_extable 54b, .Lsmall_fixup + _asm_extable 55b, .Lsmall_fixup + _asm_extable 56b, .Lsmall_fixup + _asm_extable 57b, .Lsmall_fixup + _asm_extable 58b, .Lexit + _asm_extable 59b, .Lexit SYM_FUNC_END(__copy_user_fast) diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index cc30b3b6252f..fa1148878d2b 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -10,6 +10,8 @@ #include #include +.section .noinstr.text, "ax" + SYM_FUNC_START(memcpy) /* * Some CPUs support hardware unaligned access @@ -17,9 +19,13 @@ SYM_FUNC_START(memcpy) ALTERNATIVE "b __memcpy_generic", \ "b __memcpy_fast", CPU_FEATURE_UAL SYM_FUNC_END(memcpy) -_ASM_NOKPROBE(memcpy) +SYM_FUNC_ALIAS(__memcpy, memcpy) EXPORT_SYMBOL(memcpy) +EXPORT_SYMBOL(__memcpy) + +_ASM_NOKPROBE(memcpy) +_ASM_NOKPROBE(__memcpy) /* * void *__memcpy_generic(void *dst, const void *src, size_t n) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S index 7dc76d1484b6..82dae062fec8 100644 --- a/arch/loongarch/lib/memmove.S +++ b/arch/loongarch/lib/memmove.S @@ -10,23 +10,29 @@ #include #include +.section .noinstr.text, "ax" + SYM_FUNC_START(memmove) - blt a0, a1, memcpy /* dst < src, memcpy */ - blt a1, a0, rmemcpy /* src < dst, rmemcpy */ - jr ra /* dst == src, return */ + blt a0, a1, __memcpy /* dst < src, memcpy */ + blt a1, a0, __rmemcpy /* src < dst, rmemcpy */ + jr ra /* dst == src, return */ SYM_FUNC_END(memmove) -_ASM_NOKPROBE(memmove) +SYM_FUNC_ALIAS(__memmove, memmove) EXPORT_SYMBOL(memmove) +EXPORT_SYMBOL(__memmove) -SYM_FUNC_START(rmemcpy) +_ASM_NOKPROBE(memmove) +_ASM_NOKPROBE(__memmove) + +SYM_FUNC_START(__rmemcpy) /* * Some CPUs support hardware unaligned access */ ALTERNATIVE "b __rmemcpy_generic", \ "b __rmemcpy_fast", CPU_FEATURE_UAL -SYM_FUNC_END(rmemcpy) -_ASM_NOKPROBE(rmemcpy) +SYM_FUNC_END(__rmemcpy) +_ASM_NOKPROBE(__rmemcpy) /* * void *__rmemcpy_generic(void *dst, const void *src, size_t n) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index 3f20f7996e8e..06d3ca54cbfe 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -16,6 +16,8 @@ bstrins.d \r0, \r0, 63, 32 .endm +.section .noinstr.text, "ax" + SYM_FUNC_START(memset) /* * Some CPUs support hardware unaligned access @@ -23,9 +25,13 @@ SYM_FUNC_START(memset) ALTERNATIVE "b __memset_generic", \ "b __memset_fast", CPU_FEATURE_UAL SYM_FUNC_END(memset) -_ASM_NOKPROBE(memset) +SYM_FUNC_ALIAS(__memset, memset) EXPORT_SYMBOL(memset) +EXPORT_SYMBOL(__memset) + +_ASM_NOKPROBE(memset) +_ASM_NOKPROBE(__memset) /* * void *__memset_generic(void *s, int c, size_t n) diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c new file mode 100644 index 000000000000..84cd24b728c4 --- /dev/null +++ b/arch/loongarch/lib/xor_simd.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * LoongArch SIMD XOR operations + * + * Copyright (C) 2023 WANG Xuerui + */ + +#include "xor_simd.h" + +/* + * Process one cache line (64 bytes) per loop. This is assuming all future + * popular LoongArch cores are similar performance-characteristics-wise to the + * current models. + */ +#define LINE_WIDTH 64 + +#ifdef CONFIG_CPU_HAS_LSX + +#define LD(reg, base, offset) \ + "vld $vr" #reg ", %[" #base "], " #offset "\n\t" +#define ST(reg, base, offset) \ + "vst $vr" #reg ", %[" #base "], " #offset "\n\t" +#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t" + +#define LD_INOUT_LINE(base) \ + LD(0, base, 0) \ + LD(1, base, 16) \ + LD(2, base, 32) \ + LD(3, base, 48) + +#define LD_AND_XOR_LINE(base) \ + LD(4, base, 0) \ + LD(5, base, 16) \ + LD(6, base, 32) \ + LD(7, base, 48) \ + XOR(0, 4) \ + XOR(1, 5) \ + XOR(2, 6) \ + XOR(3, 7) + +#define ST_LINE(base) \ + ST(0, base, 0) \ + ST(1, base, 16) \ + ST(2, base, 32) \ + ST(3, base, 48) + +#define XOR_FUNC_NAME(nr) __xor_lsx_##nr +#include "xor_template.c" + +#undef LD +#undef ST +#undef XOR +#undef LD_INOUT_LINE +#undef LD_AND_XOR_LINE +#undef ST_LINE +#undef XOR_FUNC_NAME + +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX + +#define LD(reg, base, offset) \ + "xvld $xr" #reg ", %[" #base "], " #offset "\n\t" +#define ST(reg, base, offset) \ + "xvst $xr" #reg ", %[" #base "], " #offset "\n\t" +#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t" + +#define LD_INOUT_LINE(base) \ + LD(0, base, 0) \ + LD(1, base, 32) + +#define LD_AND_XOR_LINE(base) \ + LD(2, base, 0) \ + LD(3, base, 32) \ + XOR(0, 2) \ + XOR(1, 3) + +#define ST_LINE(base) \ + ST(0, base, 0) \ + ST(1, base, 32) + +#define XOR_FUNC_NAME(nr) __xor_lasx_##nr +#include "xor_template.c" + +#undef LD +#undef ST +#undef XOR +#undef LD_INOUT_LINE +#undef LD_AND_XOR_LINE +#undef ST_LINE +#undef XOR_FUNC_NAME + +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h new file mode 100644 index 000000000000..f50f32514d80 --- /dev/null +++ b/arch/loongarch/lib/xor_simd.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Simple interface to link xor_simd.c and xor_simd_glue.c + * + * Separating these files ensures that no SIMD instructions are run outside of + * the kfpu critical section. + */ + +#ifndef __LOONGARCH_LIB_XOR_SIMD_H +#define __LOONGARCH_LIB_XOR_SIMD_H + +#ifdef CONFIG_CPU_HAS_LSX +void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3); +void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4, const unsigned long * __restrict p5); +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3); +void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, const unsigned long * __restrict p3, + const unsigned long * __restrict p4, const unsigned long * __restrict p5); +#endif /* CONFIG_CPU_HAS_LASX */ + +#endif /* __LOONGARCH_LIB_XOR_SIMD_H */ diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c new file mode 100644 index 000000000000..393f689dbcf6 --- /dev/null +++ b/arch/loongarch/lib/xor_simd_glue.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * LoongArch SIMD XOR operations + * + * Copyright (C) 2023 WANG Xuerui + */ + +#include +#include +#include +#include +#include "xor_simd.h" + +#define MAKE_XOR_GLUE_2(flavor) \ +void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_2(bytes, p1, p2); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_2) + +#define MAKE_XOR_GLUE_3(flavor) \ +void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2, \ + const unsigned long * __restrict p3) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_3(bytes, p1, p2, p3); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_3) + +#define MAKE_XOR_GLUE_4(flavor) \ +void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2, \ + const unsigned long * __restrict p3, \ + const unsigned long * __restrict p4) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_4(bytes, p1, p2, p3, p4); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_4) + +#define MAKE_XOR_GLUE_5(flavor) \ +void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2, \ + const unsigned long * __restrict p3, \ + const unsigned long * __restrict p4, \ + const unsigned long * __restrict p5) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_5) + +#define MAKE_XOR_GLUES(flavor) \ + MAKE_XOR_GLUE_2(flavor); \ + MAKE_XOR_GLUE_3(flavor); \ + MAKE_XOR_GLUE_4(flavor); \ + MAKE_XOR_GLUE_5(flavor) + +#ifdef CONFIG_CPU_HAS_LSX +MAKE_XOR_GLUES(lsx); +#endif + +#ifdef CONFIG_CPU_HAS_LASX +MAKE_XOR_GLUES(lasx); +#endif diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c new file mode 100644 index 000000000000..0358ced7fe33 --- /dev/null +++ b/arch/loongarch/lib/xor_template.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 WANG Xuerui + * + * Template for XOR operations, instantiated in xor_simd.c. + * + * Expected preprocessor definitions: + * + * - LINE_WIDTH + * - XOR_FUNC_NAME(nr) + * - LD_INOUT_LINE(buf) + * - LD_AND_XOR_LINE(buf) + * - ST_LINE(buf) + */ + +void XOR_FUNC_NAME(2)(unsigned long bytes, + unsigned long * __restrict v1, + const unsigned long * __restrict v2) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2) : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} + +void XOR_FUNC_NAME(3)(unsigned long bytes, + unsigned long * __restrict v1, + const unsigned long * __restrict v2, + const unsigned long * __restrict v3) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + LD_AND_XOR_LINE(v3) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + v3 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} + +void XOR_FUNC_NAME(4)(unsigned long bytes, + unsigned long * __restrict v1, + const unsigned long * __restrict v2, + const unsigned long * __restrict v3, + const unsigned long * __restrict v4) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + LD_AND_XOR_LINE(v3) + LD_AND_XOR_LINE(v4) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4) + : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + v3 += LINE_WIDTH / sizeof(unsigned long); + v4 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} + +void XOR_FUNC_NAME(5)(unsigned long bytes, + unsigned long * __restrict v1, + const unsigned long * __restrict v2, + const unsigned long * __restrict v3, + const unsigned long * __restrict v4, + const unsigned long * __restrict v5) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + LD_AND_XOR_LINE(v3) + LD_AND_XOR_LINE(v4) + LD_AND_XOR_LINE(v5) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4), + [v5] "r"(v5) : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + v3 += LINE_WIDTH / sizeof(unsigned long); + v4 += LINE_WIDTH / sizeof(unsigned long); + v5 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile index 8ffc6383f836..e4d1e581dbae 100644 --- a/arch/loongarch/mm/Makefile +++ b/arch/loongarch/mm/Makefile @@ -7,3 +7,6 @@ obj-y += init.o cache.o tlb.o tlbex.o extable.o \ fault.o ioremap.o maccess.o mmap.o pgtable.o page.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_KASAN) += kasan_init.o + +KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c index 72685a48eaf0..6be04d36ca07 100644 --- a/arch/loongarch/mm/cache.c +++ b/arch/loongarch/mm/cache.c @@ -156,7 +156,6 @@ void cpu_cache_init(void) current_cpu_data.cache_leaves_present = leaf; current_cpu_data.options |= LOONGARCH_CPU_PREFETCH; - shm_align_mask = PAGE_SIZE - 1; } static const pgprot_t protection_map[16] = { diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index da5b6d518cdb..e6376e3dce86 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,8 @@ int show_unhandled_signals = 1; -static void __kprobes no_context(struct pt_regs *regs, unsigned long address) +static void __kprobes no_context(struct pt_regs *regs, + unsigned long write, unsigned long address) { const int field = sizeof(unsigned long) * 2; @@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address) if (fixup_exception(regs)) return; + if (kfence_handle_page_fault(address, write, regs)) + return; + /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. @@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address) die("Oops", regs); } -static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address) +static void __kprobes do_out_of_memory(struct pt_regs *regs, + unsigned long write, unsigned long address) { /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ if (!user_mode(regs)) { - no_context(regs, address); + no_context(regs, write, address); return; } pagefault_out_of_memory(); @@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs, { /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) { - no_context(regs, address); + no_context(regs, write, address); return; } @@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs, /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) { - no_context(regs, address); + no_context(regs, write, address); return; } @@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, */ if (address & __UA_LIMIT) { if (!user_mode(regs)) - no_context(regs, address); + no_context(regs, write, address); else do_sigsegv(regs, write, address, si_code); return; @@ -211,7 +217,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) - no_context(regs, address); + no_context(regs, write, address); return; } @@ -232,7 +238,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, if (unlikely(fault & VM_FAULT_ERROR)) { mmap_read_unlock(mm); if (fault & VM_FAULT_OOM) { - do_out_of_memory(regs, address); + do_out_of_memory(regs, write, address); return; } else if (fault & VM_FAULT_SIGSEGV) { do_sigsegv(regs, write, address, si_code); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 3b7d8129570b..f3fe8c06ba4d 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -35,33 +35,8 @@ #include #include -/* - * We have up to 8 empty zeroed pages so we can map one of the right colour - * when needed. Since page is never written to after the initialization we - * don't have to care about aliases on other CPUs. - */ -unsigned long empty_zero_page, zero_page_mask; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(zero_page_mask); - -void setup_zero_pages(void) -{ - unsigned int order, i; - struct page *page; - - order = 0; - - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Oh boy, that early out of memory?"); - - page = virt_to_page((void *)empty_zero_page); - split_page(page, order); - for (i = 0; i < (1 << order); i++, page++) - mark_page_reserved(page); - - zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; -} void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) @@ -106,7 +81,6 @@ void __init mem_init(void) high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); - setup_zero_pages(); /* Setup zeroed pages. */ } #endif /* !CONFIG_NUMA */ @@ -191,43 +165,42 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *al #endif #endif -static pte_t *fixmap_pte(unsigned long addr) +pte_t * __init populate_kernel_pte(unsigned long addr) { - pgd_t *pgd; - p4d_t *p4d; + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d = p4d_offset(pgd, addr); pud_t *pud; pmd_t *pmd; - pgd = pgd_offset_k(addr); - p4d = p4d_offset(pgd, addr); - - if (pgd_none(*pgd)) { - pud_t *new __maybe_unused; - - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - pgd_populate(&init_mm, pgd, new); + if (p4d_none(*p4d)) { + pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pud) + panic("%s: Failed to allocate memory\n", __func__); + p4d_populate(&init_mm, p4d, pud); #ifndef __PAGETABLE_PUD_FOLDED - pud_init(new); + pud_init(pud); #endif } pud = pud_offset(p4d, addr); if (pud_none(*pud)) { - pmd_t *new __maybe_unused; - - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - pud_populate(&init_mm, pud, new); + pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pmd) + panic("%s: Failed to allocate memory\n", __func__); + pud_populate(&init_mm, pud, pmd); #ifndef __PAGETABLE_PMD_FOLDED - pmd_init(new); + pmd_init(pmd); #endif } pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - pte_t *new __maybe_unused; + if (!pmd_present(*pmd)) { + pte_t *pte; - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmd, new); + pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pte) + panic("%s: Failed to allocate memory\n", __func__); + pmd_populate_kernel(&init_mm, pmd, pte); } return pte_offset_kernel(pmd, addr); @@ -241,7 +214,7 @@ void __init __set_fixmap(enum fixed_addresses idx, BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); - ptep = fixmap_pte(addr); + ptep = populate_kernel_pte(addr); if (!pte_none(*ptep)) { pte_ERROR(*ptep); return; diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c new file mode 100644 index 000000000000..da68bc1a4643 --- /dev/null +++ b/arch/loongarch/mm/kasan_init.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ +#define pr_fmt(fmt) "kasan: " fmt +#include +#include +#include + +#include +#include +#include + +static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); + +#ifdef __PAGETABLE_PUD_FOLDED +#define __p4d_none(early, p4d) (0) +#else +#define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \ +(__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud))) +#endif + +#ifdef __PAGETABLE_PMD_FOLDED +#define __pud_none(early, pud) (0) +#else +#define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \ +(__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd))) +#endif + +#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \ +(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte))) + +#define __pte_none(early, pte) (early ? pte_none(pte) : \ +((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page))) + +bool kasan_early_stage = true; + +/* + * Alloc memory for shadow memory page table. + */ +static phys_addr_t __init kasan_alloc_zeroed_page(int node) +{ + void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, + __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node); + if (!p) + panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n", + __func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS)); + + return __pa(p); +} + +static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early) +{ + if (__pmd_none(early, READ_ONCE(*pmdp))) { + phys_addr_t pte_phys = early ? + __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte)); + pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys)); + } + + return pte_offset_kernel(pmdp, addr); +} + +static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early) +{ + if (__pud_none(early, READ_ONCE(*pudp))) { + phys_addr_t pmd_phys = early ? + __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd)); + pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys)); + } + + return pmd_offset(pudp, addr); +} + +static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early) +{ + if (__p4d_none(early, READ_ONCE(*p4dp))) { + phys_addr_t pud_phys = early ? + __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud)); + p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys)); + } + + return pud_offset(p4dp, addr); +} + +static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early); + + do { + phys_addr_t page_phys = early ? + __pa_symbol(kasan_early_shadow_page) + : kasan_alloc_zeroed_page(node); + next = addr + PAGE_SIZE; + set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); + } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep))); +} + +static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early); + + do { + next = pmd_addr_end(addr, end); + kasan_pte_populate(pmdp, addr, next, node, early); + } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp))); +} + +static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early); + + do { + next = pud_addr_end(addr, end); + kasan_pmd_populate(pudp, addr, next, node, early); + } while (pudp++, addr = next, addr != end); +} + +static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + p4d_t *p4dp = p4d_offset(pgdp, addr); + + do { + next = p4d_addr_end(addr, end); + kasan_pud_populate(p4dp, addr, next, node, early); + } while (p4dp++, addr = next, addr != end); +} + +static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, + int node, bool early) +{ + unsigned long next; + pgd_t *pgdp; + + pgdp = pgd_offset_k(addr); + + do { + next = pgd_addr_end(addr, end); + kasan_p4d_populate(pgdp, addr, next, node, early); + } while (pgdp++, addr = next, addr != end); + +} + +/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */ +static void __init kasan_map_populate(unsigned long start, unsigned long end, + int node) +{ + kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false); +} + +asmlinkage void __init kasan_early_init(void) +{ + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); +} + +static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval) +{ + WRITE_ONCE(*pgdp, pgdval); +} + +static void __init clear_pgds(unsigned long start, unsigned long end) +{ + /* + * Remove references to kasan page tables from + * swapper_pg_dir. pgd_clear() can't be used + * here because it's nop on 2,3-level pagetable setups + */ + for (; start < end; start += PGDIR_SIZE) + kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0)); +} + +void __init kasan_init(void) +{ + u64 i; + phys_addr_t pa_start, pa_end; + + /* + * PGD was populated as invalid_pmd_table or invalid_pud_table + * in pagetable_init() which depends on how many levels of page + * table you are using, but we had to clean the gpd of kasan + * shadow memory, as the pgd value is none-zero. + * The assertion pgd_none is going to be false and the formal populate + * afterwards is not going to create any new pgd at all. + */ + memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir)); + csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH); + local_flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + /* Maps everything to a single page of zeroes */ + kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); + + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START), + kasan_mem_to_shadow((void *)KFENCE_AREA_END)); + + kasan_early_stage = false; + + /* Populate the linear mapping */ + for_each_mem_range(i, &pa_start, &pa_end) { + void *start = (void *)phys_to_virt(pa_start); + void *end = (void *)phys_to_virt(pa_end); + + if (start >= end) + break; + + kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), + (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE); + } + + /* Populate modules mapping */ + kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR), + (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE); + /* + * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we + * should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(&kasan_early_shadow_pte[i], + pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO)); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH); + local_flush_tlb_all(); + + /* At this point kasan is fully initialized. Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KernelAddressSanitizer initialized.\n"); +} diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index fbe1a4856fc4..a9630a81b38a 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -8,12 +8,11 @@ #include #include -unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ -EXPORT_SYMBOL(shm_align_mask); +#define SHM_ALIGN_MASK (SHMLBA - 1) -#define COLOUR_ALIGN(addr, pgoff) \ - ((((addr) + shm_align_mask) & ~shm_align_mask) + \ - (((pgoff) << PAGE_SHIFT) & shm_align_mask)) +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK) \ + + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK)) enum mmap_allocation_direction {UP, DOWN}; @@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, * cache aliasing constraints. */ if ((flags & MAP_SHARED) && - ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK)) return -EINVAL; return addr; } @@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, } info.length = len; - info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; + info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; info.align_offset = pgoff << PAGE_SHIFT; if (dir == DOWN) { diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index b14343e211b6..71d0539e2d0b 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -9,6 +9,18 @@ #include #include +struct page *dmw_virt_to_page(unsigned long kaddr) +{ + return pfn_to_page(virt_to_pfn(kaddr)); +} +EXPORT_SYMBOL_GPL(dmw_virt_to_page); + +struct page *tlb_virt_to_page(unsigned long kaddr) +{ + return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr))); +} +EXPORT_SYMBOL_GPL(tlb_virt_to_page); + pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *init, *ret = NULL; diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index a50308b6fc25..5c97d1463328 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Objects to go into the VDSO. +KASAN_SANITIZE := n +KCOV_INSTRUMENT := n + # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 819b6bc8ac08..3df5499f7936 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -54,11 +54,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); +#ifndef __HAVE_ARCH_SHADOW_MAP static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } +#endif int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index f29aaaf2eb21..006e18decfad 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -108,6 +108,8 @@ extern const struct raid6_calls raid6_vpermxor1; extern const struct raid6_calls raid6_vpermxor2; extern const struct raid6_calls raid6_vpermxor4; extern const struct raid6_calls raid6_vpermxor8; +extern const struct raid6_calls raid6_lsx; +extern const struct raid6_calls raid6_lasx; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); @@ -123,6 +125,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2; extern const struct raid6_recov_calls raid6_recov_avx512; extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_recov_calls raid6_recov_neon; +extern const struct raid6_recov_calls raid6_recov_lsx; +extern const struct raid6_recov_calls raid6_recov_lasx; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 45e17619422b..035b0a4db476 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o +raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o hostprogs += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index a22a05c9af8a..0ec534faf019 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -73,6 +73,14 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_neonx2, &raid6_neonx1, #endif +#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_CPU_HAS_LASX + &raid6_lasx, +#endif +#ifdef CONFIG_CPU_HAS_LSX + &raid6_lsx, +#endif +#endif #if defined(__ia64__) &raid6_intx32, &raid6_intx16, @@ -103,6 +111,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { #endif #if defined(CONFIG_KERNEL_MODE_NEON) &raid6_recov_neon, +#endif +#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_CPU_HAS_LASX + &raid6_recov_lasx, +#endif +#ifdef CONFIG_CPU_HAS_LSX + &raid6_recov_lsx, +#endif #endif &raid6_recov_intx1, NULL diff --git a/lib/raid6/loongarch.h b/lib/raid6/loongarch.h new file mode 100644 index 000000000000..acfc33ce7056 --- /dev/null +++ b/lib/raid6/loongarch.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 WANG Xuerui + * + * raid6/loongarch.h + * + * Definitions common to LoongArch RAID-6 code only + */ + +#ifndef _LIB_RAID6_LOONGARCH_H +#define _LIB_RAID6_LOONGARCH_H + +#ifdef __KERNEL__ + +#include +#include + +#else /* for user-space testing */ + +#include + +/* have to supply these defines for glibc 2.37- and musl */ +#ifndef HWCAP_LOONGARCH_LSX +#define HWCAP_LOONGARCH_LSX (1 << 4) +#endif +#ifndef HWCAP_LOONGARCH_LASX +#define HWCAP_LOONGARCH_LASX (1 << 5) +#endif + +#define kernel_fpu_begin() +#define kernel_fpu_end() + +#define cpu_has_lsx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX) +#define cpu_has_lasx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX) + +#endif /* __KERNEL__ */ + +#endif /* _LIB_RAID6_LOONGARCH_H */ diff --git a/lib/raid6/loongarch_simd.c b/lib/raid6/loongarch_simd.c new file mode 100644 index 000000000000..aa5d9f924ca3 --- /dev/null +++ b/lib/raid6/loongarch_simd.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX) + * + * Copyright 2023 WANG Xuerui + * + * Based on the generic RAID-6 code (int.uc): + * + * Copyright 2002-2004 H. Peter Anvin + */ + +#include +#include "loongarch.h" + +/* + * The vector algorithms are currently priority 0, which means the generic + * scalar algorithms are not being disabled if vector support is present. + * This is like the similar LoongArch RAID5 XOR code, with the main reason + * repeated here: it cannot be ruled out at this point of time, that some + * future (maybe reduced) models could run the vector algorithms slower than + * the scalar ones, maybe for errata or micro-op reasons. It may be + * appropriate to revisit this after one or two more uarch generations. + */ + +#ifdef CONFIG_CPU_HAS_LSX +#define NSIZE 16 + +static int raid6_has_lsx(void) +{ + return cpu_has_lsx; +} + +static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $vr0, $vr1, $vr2, $vr3: wp + * $vr4, $vr5, $vr6, $vr7: wq + * $vr8, $vr9, $vr10, $vr11: wd + * $vr12, $vr13, $vr14, $vr15: w2 + * $vr16, $vr17, $vr18, $vr19: w1 + */ + for (d = 0; d < bytes; d += NSIZE*4) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); + asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); + asm volatile("vori.b $vr4, $vr0, 0"); + asm volatile("vori.b $vr5, $vr1, 0"); + asm volatile("vori.b $vr6, $vr2, 0"); + asm volatile("vori.b $vr7, $vr3, 0"); + for (z = z0-1; z >= 0; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); + asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); + asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("vxor.v $vr16, $vr16, $vr12"); + asm volatile("vxor.v $vr17, $vr17, $vr13"); + asm volatile("vxor.v $vr18, $vr18, $vr14"); + asm volatile("vxor.v $vr19, $vr19, $vr15"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr8"); + asm volatile("vxor.v $vr5, $vr17, $vr9"); + asm volatile("vxor.v $vr6, $vr18, $vr10"); + asm volatile("vxor.v $vr7, $vr19, $vr11"); + } + /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ + asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0])); + asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1])); + asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2])); + asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3])); + /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ + asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0])); + asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1])); + asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2])); + asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3])); + } + + kernel_fpu_end(); +} + +static void raid6_lsx_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $vr0, $vr1, $vr2, $vr3: wp + * $vr4, $vr5, $vr6, $vr7: wq + * $vr8, $vr9, $vr10, $vr11: wd + * $vr12, $vr13, $vr14, $vr15: w2 + * $vr16, $vr17, $vr18, $vr19: w1 + */ + for (d = 0; d < bytes; d += NSIZE*4) { + /* P/Q data pages */ + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); + asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); + asm volatile("vori.b $vr4, $vr0, 0"); + asm volatile("vori.b $vr5, $vr1, 0"); + asm volatile("vori.b $vr6, $vr2, 0"); + asm volatile("vori.b $vr7, $vr3, 0"); + for (z = z0-1; z >= start; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); + asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); + asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("vxor.v $vr16, $vr16, $vr12"); + asm volatile("vxor.v $vr17, $vr17, $vr13"); + asm volatile("vxor.v $vr18, $vr18, $vr14"); + asm volatile("vxor.v $vr19, $vr19, $vr15"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr8"); + asm volatile("vxor.v $vr5, $vr17, $vr9"); + asm volatile("vxor.v $vr6, $vr18, $vr10"); + asm volatile("vxor.v $vr7, $vr19, $vr11"); + } + + /* P/Q left side optimization */ + for (z = start-1; z >= 0; z--) { + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* wq$$ = w1$$ ^ w2$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr12"); + asm volatile("vxor.v $vr5, $vr17, $vr13"); + asm volatile("vxor.v $vr6, $vr18, $vr14"); + asm volatile("vxor.v $vr7, $vr19, $vr15"); + } + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + */ + asm volatile( + "vld $vr20, %0\n\t" + "vld $vr21, %1\n\t" + "vld $vr22, %2\n\t" + "vld $vr23, %3\n\t" + "vld $vr24, %4\n\t" + "vld $vr25, %5\n\t" + "vld $vr26, %6\n\t" + "vld $vr27, %7\n\t" + "vxor.v $vr20, $vr20, $vr0\n\t" + "vxor.v $vr21, $vr21, $vr1\n\t" + "vxor.v $vr22, $vr22, $vr2\n\t" + "vxor.v $vr23, $vr23, $vr3\n\t" + "vxor.v $vr24, $vr24, $vr4\n\t" + "vxor.v $vr25, $vr25, $vr5\n\t" + "vxor.v $vr26, $vr26, $vr6\n\t" + "vxor.v $vr27, $vr27, $vr7\n\t" + "vst $vr20, %0\n\t" + "vst $vr21, %1\n\t" + "vst $vr22, %2\n\t" + "vst $vr23, %3\n\t" + "vst $vr24, %4\n\t" + "vst $vr25, %5\n\t" + "vst $vr26, %6\n\t" + "vst $vr27, %7\n\t" + : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), + "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]), + "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]), + "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3]) + ); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_lsx = { + raid6_lsx_gen_syndrome, + raid6_lsx_xor_syndrome, + raid6_has_lsx, + "lsx", + .priority = 0 /* see the comment near the top of the file for reason */ +}; + +#undef NSIZE +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +#define NSIZE 32 + +static int raid6_has_lasx(void) +{ + return cpu_has_lasx; +} + +static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $xr0, $xr1: wp + * $xr2, $xr3: wq + * $xr4, $xr5: wd + * $xr6, $xr7: w2 + * $xr8, $xr9: w1 + */ + for (d = 0; d < bytes; d += NSIZE*2) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("xvori.b $xr2, $xr0, 0"); + asm volatile("xvori.b $xr3, $xr1, 0"); + for (z = z0-1; z >= 0; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("xvxor.v $xr8, $xr8, $xr6"); + asm volatile("xvxor.v $xr9, $xr9, $xr7"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr4"); + asm volatile("xvxor.v $xr3, $xr9, $xr5"); + } + /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ + asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0])); + asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1])); + /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ + asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0])); + asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1])); + } + + kernel_fpu_end(); +} + +static void raid6_lasx_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $xr0, $xr1: wp + * $xr2, $xr3: wq + * $xr4, $xr5: wd + * $xr6, $xr7: w2 + * $xr8, $xr9: w1 + */ + for (d = 0; d < bytes; d += NSIZE*2) { + /* P/Q data pages */ + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("xvori.b $xr2, $xr0, 0"); + asm volatile("xvori.b $xr3, $xr1, 0"); + for (z = z0-1; z >= start; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("xvxor.v $xr8, $xr8, $xr6"); + asm volatile("xvxor.v $xr9, $xr9, $xr7"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr4"); + asm volatile("xvxor.v $xr3, $xr9, $xr5"); + } + + /* P/Q left side optimization */ + for (z = start-1; z >= 0; z--) { + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* wq$$ = w1$$ ^ w2$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr6"); + asm volatile("xvxor.v $xr3, $xr9, $xr7"); + } + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + */ + asm volatile( + "xvld $xr10, %0\n\t" + "xvld $xr11, %1\n\t" + "xvld $xr12, %2\n\t" + "xvld $xr13, %3\n\t" + "xvxor.v $xr10, $xr10, $xr0\n\t" + "xvxor.v $xr11, $xr11, $xr1\n\t" + "xvxor.v $xr12, $xr12, $xr2\n\t" + "xvxor.v $xr13, $xr13, $xr3\n\t" + "xvst $xr10, %0\n\t" + "xvst $xr11, %1\n\t" + "xvst $xr12, %2\n\t" + "xvst $xr13, %3\n\t" + : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), + "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]) + ); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_lasx = { + raid6_lasx_gen_syndrome, + raid6_lasx_xor_syndrome, + raid6_has_lasx, + "lasx", + .priority = 0 /* see the comment near the top of the file for reason */ +}; +#undef NSIZE +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c new file mode 100644 index 000000000000..94aeac85e6f7 --- /dev/null +++ b/lib/raid6/recov_loongarch_simd.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX) + * + * Copyright (C) 2023 WANG Xuerui + * + * Originally based on recov_avx2.c and recov_ssse3.c: + * + * Copyright (C) 2012 Intel Corporation + * Author: Jim Kukunas + */ + +#include +#include "loongarch.h" + +/* + * Unlike with the syndrome calculation algorithms, there's no boot-time + * selection of recovery algorithms by benchmarking, so we have to specify + * the priorities and hope the future cores will all have decent vector + * support (i.e. no LASX slower than LSX, or even scalar code). + */ + +#ifdef CONFIG_CPU_HAS_LSX +static int raid6_has_lsx(void) +{ + return cpu_has_lsx; +} + +static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks - 2] = p; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + /* + * vr20, vr21: qmul + * vr22, vr23: pbmul + */ + asm volatile("vld $vr20, %0" : : "m" (qmul[0])); + asm volatile("vld $vr21, %0" : : "m" (qmul[16])); + asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); + asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); + + while (bytes) { + /* vr4 - vr7: Q */ + asm volatile("vld $vr4, %0" : : "m" (q[0])); + asm volatile("vld $vr5, %0" : : "m" (q[16])); + asm volatile("vld $vr6, %0" : : "m" (q[32])); + asm volatile("vld $vr7, %0" : : "m" (q[48])); + /* vr4 - vr7: Q + Qxy */ + asm volatile("vld $vr8, %0" : : "m" (dq[0])); + asm volatile("vld $vr9, %0" : : "m" (dq[16])); + asm volatile("vld $vr10, %0" : : "m" (dq[32])); + asm volatile("vld $vr11, %0" : : "m" (dq[48])); + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + /* vr0 - vr3: P */ + asm volatile("vld $vr0, %0" : : "m" (p[0])); + asm volatile("vld $vr1, %0" : : "m" (p[16])); + asm volatile("vld $vr2, %0" : : "m" (p[32])); + asm volatile("vld $vr3, %0" : : "m" (p[48])); + /* vr0 - vr3: P + Pxy */ + asm volatile("vld $vr8, %0" : : "m" (dp[0])); + asm volatile("vld $vr9, %0" : : "m" (dp[16])); + asm volatile("vld $vr10, %0" : : "m" (dp[32])); + asm volatile("vld $vr11, %0" : : "m" (dp[48])); + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + + /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */ + asm volatile("vsrli.b $vr8, $vr4, 4"); + asm volatile("vsrli.b $vr9, $vr5, 4"); + asm volatile("vsrli.b $vr10, $vr6, 4"); + asm volatile("vsrli.b $vr11, $vr7, 4"); + /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */ + asm volatile("vandi.b $vr4, $vr4, 0x0f"); + asm volatile("vandi.b $vr5, $vr5, 0x0f"); + asm volatile("vandi.b $vr6, $vr6, 0x0f"); + asm volatile("vandi.b $vr7, $vr7, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4"); + asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5"); + asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6"); + asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7"); + /* lookup from qmul[16] */ + asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8"); + asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9"); + asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10"); + asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11"); + /* vr16 - vr19: B(Q + Qxy) */ + asm volatile("vxor.v $vr16, $vr8, $vr4"); + asm volatile("vxor.v $vr17, $vr9, $vr5"); + asm volatile("vxor.v $vr18, $vr10, $vr6"); + asm volatile("vxor.v $vr19, $vr11, $vr7"); + + /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */ + asm volatile("vsrli.b $vr4, $vr0, 4"); + asm volatile("vsrli.b $vr5, $vr1, 4"); + asm volatile("vsrli.b $vr6, $vr2, 4"); + asm volatile("vsrli.b $vr7, $vr3, 4"); + /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */ + asm volatile("vandi.b $vr12, $vr0, 0x0f"); + asm volatile("vandi.b $vr13, $vr1, 0x0f"); + asm volatile("vandi.b $vr14, $vr2, 0x0f"); + asm volatile("vandi.b $vr15, $vr3, 0x0f"); + /* lookup from pbmul[0] */ + asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12"); + asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13"); + asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14"); + asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15"); + /* lookup from pbmul[16] */ + asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4"); + asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5"); + asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6"); + asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7"); + /* vr4 - vr7: A(P + Pxy) */ + asm volatile("vxor.v $vr4, $vr4, $vr12"); + asm volatile("vxor.v $vr5, $vr5, $vr13"); + asm volatile("vxor.v $vr6, $vr6, $vr14"); + asm volatile("vxor.v $vr7, $vr7, $vr15"); + + /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */ + asm volatile("vxor.v $vr4, $vr4, $vr16"); + asm volatile("vxor.v $vr5, $vr5, $vr17"); + asm volatile("vxor.v $vr6, $vr6, $vr18"); + asm volatile("vxor.v $vr7, $vr7, $vr19"); + asm volatile("vst $vr4, %0" : "=m" (dq[0])); + asm volatile("vst $vr5, %0" : "=m" (dq[16])); + asm volatile("vst $vr6, %0" : "=m" (dq[32])); + asm volatile("vst $vr7, %0" : "=m" (dq[48])); + + /* vr0 - vr3: P + Pxy + Dx = Dy */ + asm volatile("vxor.v $vr0, $vr0, $vr4"); + asm volatile("vxor.v $vr1, $vr1, $vr5"); + asm volatile("vxor.v $vr2, $vr2, $vr6"); + asm volatile("vxor.v $vr3, $vr3, $vr7"); + asm volatile("vst $vr0, %0" : "=m" (dp[0])); + asm volatile("vst $vr1, %0" : "=m" (dp[16])); + asm volatile("vst $vr2, %0" : "=m" (dp[32])); + asm volatile("vst $vr3, %0" : "=m" (dp[48])); + + bytes -= 64; + p += 64; + q += 64; + dp += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + /* vr22, vr23: qmul */ + asm volatile("vld $vr22, %0" : : "m" (qmul[0])); + asm volatile("vld $vr23, %0" : : "m" (qmul[16])); + + while (bytes) { + /* vr0 - vr3: P + Dx */ + asm volatile("vld $vr0, %0" : : "m" (p[0])); + asm volatile("vld $vr1, %0" : : "m" (p[16])); + asm volatile("vld $vr2, %0" : : "m" (p[32])); + asm volatile("vld $vr3, %0" : : "m" (p[48])); + /* vr4 - vr7: Qx */ + asm volatile("vld $vr4, %0" : : "m" (dq[0])); + asm volatile("vld $vr5, %0" : : "m" (dq[16])); + asm volatile("vld $vr6, %0" : : "m" (dq[32])); + asm volatile("vld $vr7, %0" : : "m" (dq[48])); + /* vr4 - vr7: Q + Qx */ + asm volatile("vld $vr8, %0" : : "m" (q[0])); + asm volatile("vld $vr9, %0" : : "m" (q[16])); + asm volatile("vld $vr10, %0" : : "m" (q[32])); + asm volatile("vld $vr11, %0" : : "m" (q[48])); + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + + /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */ + asm volatile("vsrli.b $vr8, $vr4, 4"); + asm volatile("vsrli.b $vr9, $vr5, 4"); + asm volatile("vsrli.b $vr10, $vr6, 4"); + asm volatile("vsrli.b $vr11, $vr7, 4"); + /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */ + asm volatile("vandi.b $vr4, $vr4, 0x0f"); + asm volatile("vandi.b $vr5, $vr5, 0x0f"); + asm volatile("vandi.b $vr6, $vr6, 0x0f"); + asm volatile("vandi.b $vr7, $vr7, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4"); + asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5"); + asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6"); + asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7"); + /* lookup from qmul[16] */ + asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8"); + asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9"); + asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10"); + asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11"); + /* vr4 - vr7: qmul(Q + Qx) = Dx */ + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + asm volatile("vst $vr4, %0" : "=m" (dq[0])); + asm volatile("vst $vr5, %0" : "=m" (dq[16])); + asm volatile("vst $vr6, %0" : "=m" (dq[32])); + asm volatile("vst $vr7, %0" : "=m" (dq[48])); + + /* vr0 - vr3: P + Dx + Dx = P */ + asm volatile("vxor.v $vr0, $vr0, $vr4"); + asm volatile("vxor.v $vr1, $vr1, $vr5"); + asm volatile("vxor.v $vr2, $vr2, $vr6"); + asm volatile("vxor.v $vr3, $vr3, $vr7"); + asm volatile("vst $vr0, %0" : "=m" (p[0])); + asm volatile("vst $vr1, %0" : "=m" (p[16])); + asm volatile("vst $vr2, %0" : "=m" (p[32])); + asm volatile("vst $vr3, %0" : "=m" (p[48])); + + bytes -= 64; + p += 64; + q += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_lsx = { + .data2 = raid6_2data_recov_lsx, + .datap = raid6_datap_recov_lsx, + .valid = raid6_has_lsx, + .name = "lsx", + .priority = 1, +}; +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +static int raid6_has_lasx(void) +{ + return cpu_has_lasx; +} + +static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks - 2] = p; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + /* + * xr20, xr21: qmul + * xr22, xr23: pbmul + */ + asm volatile("vld $vr20, %0" : : "m" (qmul[0])); + asm volatile("vld $vr21, %0" : : "m" (qmul[16])); + asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); + asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); + asm volatile("xvreplve0.q $xr20, $xr20"); + asm volatile("xvreplve0.q $xr21, $xr21"); + asm volatile("xvreplve0.q $xr22, $xr22"); + asm volatile("xvreplve0.q $xr23, $xr23"); + + while (bytes) { + /* xr0, xr1: Q */ + asm volatile("xvld $xr0, %0" : : "m" (q[0])); + asm volatile("xvld $xr1, %0" : : "m" (q[32])); + /* xr0, xr1: Q + Qxy */ + asm volatile("xvld $xr4, %0" : : "m" (dq[0])); + asm volatile("xvld $xr5, %0" : : "m" (dq[32])); + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* xr2, xr3: P */ + asm volatile("xvld $xr2, %0" : : "m" (p[0])); + asm volatile("xvld $xr3, %0" : : "m" (p[32])); + /* xr2, xr3: P + Pxy */ + asm volatile("xvld $xr4, %0" : : "m" (dp[0])); + asm volatile("xvld $xr5, %0" : : "m" (dp[32])); + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */ + asm volatile("xvsrli.b $xr4, $xr0, 4"); + asm volatile("xvsrli.b $xr5, $xr1, 4"); + /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */ + asm volatile("xvandi.b $xr0, $xr0, 0x0f"); + asm volatile("xvandi.b $xr1, $xr1, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0"); + asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1"); + /* lookup from qmul[16] */ + asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4"); + asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5"); + /* xr6, xr7: B(Q + Qxy) */ + asm volatile("xvxor.v $xr6, $xr4, $xr0"); + asm volatile("xvxor.v $xr7, $xr5, $xr1"); + + /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */ + asm volatile("xvsrli.b $xr4, $xr2, 4"); + asm volatile("xvsrli.b $xr5, $xr3, 4"); + /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */ + asm volatile("xvandi.b $xr0, $xr2, 0x0f"); + asm volatile("xvandi.b $xr1, $xr3, 0x0f"); + /* lookup from pbmul[0] */ + asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0"); + asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1"); + /* lookup from pbmul[16] */ + asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); + asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); + /* xr0, xr1: A(P + Pxy) */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + + /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */ + asm volatile("xvxor.v $xr0, $xr0, $xr6"); + asm volatile("xvxor.v $xr1, $xr1, $xr7"); + + /* xr2, xr3: P + Pxy + Dx = Dy */ + asm volatile("xvxor.v $xr2, $xr2, $xr0"); + asm volatile("xvxor.v $xr3, $xr3, $xr1"); + + asm volatile("xvst $xr0, %0" : "=m" (dq[0])); + asm volatile("xvst $xr1, %0" : "=m" (dq[32])); + asm volatile("xvst $xr2, %0" : "=m" (dp[0])); + asm volatile("xvst $xr3, %0" : "=m" (dp[32])); + + bytes -= 64; + p += 64; + q += 64; + dp += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + /* xr22, xr23: qmul */ + asm volatile("vld $vr22, %0" : : "m" (qmul[0])); + asm volatile("xvreplve0.q $xr22, $xr22"); + asm volatile("vld $vr23, %0" : : "m" (qmul[16])); + asm volatile("xvreplve0.q $xr23, $xr23"); + + while (bytes) { + /* xr0, xr1: P + Dx */ + asm volatile("xvld $xr0, %0" : : "m" (p[0])); + asm volatile("xvld $xr1, %0" : : "m" (p[32])); + /* xr2, xr3: Qx */ + asm volatile("xvld $xr2, %0" : : "m" (dq[0])); + asm volatile("xvld $xr3, %0" : : "m" (dq[32])); + /* xr2, xr3: Q + Qx */ + asm volatile("xvld $xr4, %0" : : "m" (q[0])); + asm volatile("xvld $xr5, %0" : : "m" (q[32])); + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */ + asm volatile("xvsrli.b $xr4, $xr2, 4"); + asm volatile("xvsrli.b $xr5, $xr3, 4"); + /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */ + asm volatile("xvandi.b $xr2, $xr2, 0x0f"); + asm volatile("xvandi.b $xr3, $xr3, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2"); + asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3"); + /* lookup from qmul[16] */ + asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); + asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); + /* xr2, xr3: qmul(Q + Qx) = Dx */ + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr0, xr1: P + Dx + Dx = P */ + asm volatile("xvxor.v $xr0, $xr0, $xr2"); + asm volatile("xvxor.v $xr1, $xr1, $xr3"); + + asm volatile("xvst $xr2, %0" : "=m" (dq[0])); + asm volatile("xvst $xr3, %0" : "=m" (dq[32])); + asm volatile("xvst $xr0, %0" : "=m" (p[0])); + asm volatile("xvst $xr1, %0" : "=m" (p[32])); + + bytes -= 64; + p += 64; + q += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_lasx = { + .data2 = raid6_2data_recov_lasx, + .datap = raid6_datap_recov_lasx, + .valid = raid6_has_lasx, + .name = "lasx", + .priority = 2, +}; +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 1f693ea3b980..2abe0076a636 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -41,6 +41,16 @@ ifeq ($(findstring ppc,$(ARCH)),ppc) gcc -c -x c - >/dev/null && rm ./-.o && echo yes) endif +ifeq ($(ARCH),loongarch64) + CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1 + CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' | \ + gcc -c -x assembler - >/dev/null 2>&1 && \ + rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1) + CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' | \ + gcc -c -x assembler - >/dev/null 2>&1 && \ + rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1) +endif + ifeq ($(IS_X86),yes) OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o CFLAGS += -DCONFIG_X86 @@ -54,6 +64,8 @@ else ifeq ($(HAS_ALTIVEC),yes) CFLAGS += -DCONFIG_ALTIVEC OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o +else ifeq ($(ARCH),loongarch64) + OBJS += loongarch_simd.o recov_loongarch_simd.o endif .c.o: diff --git a/mm/kasan/init.c b/mm/kasan/init.c index dcfec277e839..89895f38f722 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -139,6 +139,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr, return 0; } +void __weak __meminit pmd_init(void *addr) +{ +} + static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end) { @@ -166,8 +170,9 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr, if (!p) return -ENOMEM; } else { - pud_populate(&init_mm, pud, - early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); + pmd_init(p); + pud_populate(&init_mm, pud, p); } } zero_pmd_populate(pud, addr, next); @@ -176,6 +181,10 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr, return 0; } +void __weak __meminit pud_init(void *addr) +{ +} + static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end) { @@ -207,8 +216,9 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, if (!p) return -ENOMEM; } else { - p4d_populate(&init_mm, p4d, - early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); + pud_init(p); + p4d_populate(&init_mm, p4d, p); } } zero_pud_populate(p4d, addr, next); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 2e973b36fe07..f70e3d7a602e 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -291,16 +291,22 @@ struct kasan_stack_ring { #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) +#ifndef __HAVE_ARCH_SHADOW_MAP static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT); } +#endif static __always_inline bool addr_has_metadata(const void *addr) { +#ifdef __HAVE_ARCH_SHADOW_MAP + return (kasan_mem_to_shadow((void *)addr) != NULL); +#else return (kasan_reset_tag(addr) >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); +#endif } /** diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 96fd0411f5c5..3872528d0963 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -574,13 +574,14 @@ static void rcu_guarded_free(struct rcu_head *h) */ static unsigned long kfence_init_pool(void) { - unsigned long addr = (unsigned long)__kfence_pool; + unsigned long addr; struct page *pages; int i; if (!arch_kfence_init_pool()) - return addr; + return (unsigned long)__kfence_pool; + addr = (unsigned long)__kfence_pool; pages = virt_to_page(__kfence_pool); /*