Skip to content

Commit

Permalink
Merge pull request #16 from whentojump/llvm-trunk-next
Browse files Browse the repository at this point in the history
Patch v2
  • Loading branch information
chuckwolber authored Sep 5, 2024
2 parents a94bbc2 + b53cb39 commit 343ffbf
Show file tree
Hide file tree
Showing 13 changed files with 1,223 additions and 15 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/llvm-trunk-linux-mainline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ on:
- llvm-trunk
- llvm-trunk-next
paths:
- '.github/workflows/**'
- 'ci/**'
- 'patches/**'
- '.github/workflows/llvm-trunk-linux-mainline.yml'
- 'ci/linux-mainline/**'
- 'patches/v2.0/**'
- 'scripts/**'
workflow_dispatch:
schedule:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/llvm-trunk-linux-v5.15.153.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ on:
- llvm-trunk
- llvm-trunk-next
paths:
- '.github/workflows/**'
- 'ci/**'
- 'patches/**'
- '.github/workflows/llvm-trunk-linux-v5.15.153.yml'
- 'ci/linux-v5.15.153/**'
- 'patches/v0.6/**'
- 'scripts/**'
workflow_dispatch:
schedule:
Expand Down
20 changes: 14 additions & 6 deletions ci/linux-mainline/2_pull_source.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

repo=${1:-"xlab-uiuc/linux-mcdc"}
branch=${2:-"llvm-trunk-next"}
llvm_ref=${3:-"6b98a723653214a6cde05ae3cb5233af328ff101"}

echo $repo
echo $branch
Expand All @@ -16,22 +17,29 @@ kernel_latest_tag=v$(
)
echo $kernel_latest_tag
current_base=v$(
cat $MCDC_HOME/linux-mcdc/patches/v1.0/README.md | rev | cut -d ' ' -f 1 | cut -d . -f 2- | rev
cat $MCDC_HOME/linux-mcdc/patches/latest/README.md | rev | cut -d ' ' -f 1 | cut -d . -f 2- | rev
)
echo $current_base
if [[ "$kernel_latest_tag" != "$current_base" ]]; then
echo "There are updates in upstream. Patch v1.0 needs to be rebased."
echo "There are updates in upstream. "patches/latest/" likely needs to be rebased."
# Fail on purpose as likely we have to resolve some conflicts
exit 1
fi

# LLVM if we want to build it from source (optional)
git clone https://github.com/llvm/llvm-project.git --depth 5
git clone https://github.com/llvm/llvm-project.git
# Linux kernel
git clone https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch $kernel_latest_tag --depth 5
cp -r linux linux2

# Use a specific ref of LLVM
cd $MCDC_HOME/llvm-project
git checkout $llvm_ref

# Apply kernel patches
cd $MCDC_HOME/linux
git apply $MCDC_HOME/linux-mcdc/patches/v1.0/0001-llvm-cov-add-Clang-s-Source-based-Code-Coverage-supp.patch
git apply $MCDC_HOME/linux-mcdc/patches/v1.0/0002-kbuild-llvm-cov-disable-instrumentation-in-odd-or-se.patch
git apply $MCDC_HOME/linux-mcdc/patches/v1.0/0003-llvm-cov-add-Clang-s-MC-DC-support.patch
git apply $MCDC_HOME/linux-mcdc/patches/latest/0000-cover-letter.patch
git apply $MCDC_HOME/linux-mcdc/patches/latest/0001-llvm-cov-add-Clang-s-Source-based-Code-Coverage-supp.patch
git apply $MCDC_HOME/linux-mcdc/patches/latest/0002-llvm-cov-add-Clang-s-MC-DC-support.patch
git apply $MCDC_HOME/linux-mcdc/patches/latest/0003-x86-disable-llvm-cov-instrumentation.patch
git apply $MCDC_HOME/linux-mcdc/patches/latest/0004-x86-enable-llvm-cov-support.patch
32 changes: 32 additions & 0 deletions ci/linux-mainline/4_build_kernel.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,36 @@
#!/bin/bash

#### Let's see if vanilla kernel without the patch boots {

cd $MCDC_HOME/linux2
make LLVM=1 defconfig

./scripts/config -e CONFIG_9P_FS_POSIX_ACL
./scripts/config -e CONFIG_9P_FS
./scripts/config -e CONFIG_NET_9P_VIRTIO
./scripts/config -e CONFIG_NET_9P
./scripts/config -e CONFIG_PCI
./scripts/config -e CONFIG_VIRTIO_PCI
./scripts/config -e CONFIG_OVERLAY_FS
./scripts/config -e CONFIG_DEBUG_FS
./scripts/config -e CONFIG_CONFIGFS_FS
./scripts/config -e CONFIG_MAGIC_SYSRQ
make LLVM=1 olddefconfig

./scripts/config -e CONFIG_KUNIT
./scripts/config -e CONFIG_KUNIT_ALL_TESTS
# See https://github.com/xlab-uiuc/linux-mcdc/pull/10#issuecomment-2291603705
./scripts/config -d CONFIG_KUNIT_FAULT_TEST
make LLVM=1 olddefconfig

make LLVM=1 -j$(nproc) >& /dev/null

timeout 60 $MCDC_HOME/linux-mcdc/scripts/q -c "uname -a"

make LLVM=1 mrproper

##### } // Let's see if vanilla kernel without the patch boots

cd $MCDC_HOME/linux
make LLVM=1 defconfig

Expand All @@ -23,6 +54,7 @@ make LLVM=1 olddefconfig

./scripts/config -e CONFIG_LLVM_COV_KERNEL
./scripts/config -e CONFIG_LLVM_COV_KERNEL_MCDC
./scripts/config -e CONFIG_LLVM_COV_PROFILE_ALL
./scripts/config --set-val LLVM_COV_KERNEL_MCDC_MAX_CONDITIONS 44
make LLVM=1 olddefconfig

Expand Down
7 changes: 6 additions & 1 deletion ci/linux-v5.15.153/2_pull_source.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

repo=${1:-"xlab-uiuc/linux-mcdc"}
branch=${2:-"llvm-trunk-next"}
llvm_ref=${3:-"6b98a723653214a6cde05ae3cb5233af328ff101"}

echo $repo
echo $branch
Expand All @@ -11,10 +12,14 @@ cd $MCDC_HOME
# This meta repository
git clone https://github.com/$repo.git --branch $branch
# LLVM if we want to build it from source (optional)
git clone https://github.com/llvm/llvm-project.git --depth 5
git clone https://github.com/llvm/llvm-project.git
# Linux kernel
git clone https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git --branch v5.15.153 --depth 5

# Use a specific ref of LLVM
cd $MCDC_HOME/llvm-project
git checkout $llvm_ref

# Apply kernel patches
cd $MCDC_HOME/linux
git apply $MCDC_HOME/linux-mcdc/patches/v0.6/0001-llvm-cov-add-Clang-s-Source-based-Code-Coverage-supp.patch
Expand Down
14 changes: 12 additions & 2 deletions ci/linux-v5.15.153/5_boot_kernel_and_collect_coverage.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

guest_timeout=600

#
# Test the functionality of reset
#
Expand All @@ -11,7 +13,11 @@ GUEST_COMMANDS="$GUEST_COMMANDS; echo 1 > /sys/kernel/debug/llvm-cov/reset"
GUEST_COMMANDS="$GUEST_COMMANDS; cp /sys/kernel/debug/llvm-cov/profraw ."

cd $MCDC_HOME/linux
$MCDC_HOME/linux-mcdc/scripts/q -c "$GUEST_COMMANDS"
timeout $guest_timeout $MCDC_HOME/linux-mcdc/scripts/q -c "$GUEST_COMMANDS"
ret=$?
if [[ $ret -eq 124 ]]; then
exit 1
fi

file profraw |& tee /tmp/file.log
if ! grep "LLVM raw profile data, version 10" /tmp/file.log > /dev/null; then
Expand Down Expand Up @@ -45,7 +51,11 @@ GUEST_COMMANDS="$GUEST_COMMANDS; ls /sys/kernel/debug/llvm-cov"
GUEST_COMMANDS="$GUEST_COMMANDS; cp /sys/kernel/debug/llvm-cov/profraw ."

cd $MCDC_HOME/linux
$MCDC_HOME/linux-mcdc/scripts/q -c "$GUEST_COMMANDS"
timeout $guest_timeout $MCDC_HOME/linux-mcdc/scripts/q -c "$GUEST_COMMANDS"
ret=$?
if [[ $ret -eq 124 ]]; then
exit 1
fi

file profraw |& tee /tmp/file.log
if ! grep "LLVM raw profile data, version 10" /tmp/file.log > /dev/null; then
Expand Down
1 change: 1 addition & 0 deletions patches/latest
205 changes: 205 additions & 0 deletions patches/v2.0/0000-cover-letter.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
From 7c60851daae39815fa696fc2265e838b1913f7ac Mon Sep 17 00:00:00 2001
From: Wentao Zhang <[email protected]>
Date: Wed, 4 Sep 2024 18:13:07 -0500
Subject: [RFC PATCH 0/4] Enable measuring the kernel's Source-based Code Coverage and MC/DC with Clang

This series adds support for building x86-64 kernels with Clang's Source-
based Code Coverage[1] in order to facilitate Modified Condition/Decision
Coverage (MC/DC)[2] that provably correlates to source code for all levels
of compiler optimization.

The newly added kernel/llvm-cov/ directory complements the existing gcov
implementation. Gcov works at the object code level which may better
reflect actual execution. However, Gcov lacks the necessary information to
correlate coverage measurement with source code location when compiler
optimization level is non-zero (which is the default when building the
kernel). In addition, gcov reports are occasionally ambiguous when
attempting to compare with source code level developer intent.

In the following gcov example from drivers/firmware/dmi_scan.c, an
expression with four conditions is reported to have six branch outcomes,
which is not ideally informative in many safety related use cases, such as
automotive, medical, and aerospace.

5: 1068: if (s == e || *e != '/' || !month || month > 12) {
branch 0 taken 5 (fallthrough)
branch 1 taken 0
branch 2 taken 5 (fallthrough)
branch 3 taken 0
branch 4 taken 0 (fallthrough)
branch 5 taken 5

On the other hand, Clang's Source-based Code Coverage instruments at the
compiler frontend which maintains an accurate mapping from coverage
measurement to source code location. Coverage reports reflect exactly how
the code is written regardless of optimization and can present advanced
metrics like branch coverage and MC/DC in a clearer way. Coverage report
for the same snippet by llvm-cov would look as follows:

1068| 5| if (s == e || *e != '/' || !month || month > 12) {
------------------
| Branch (1068:6): [True: 0, False: 5]
| Branch (1068:16): [True: 0, False: 5]
| Branch (1068:29): [True: 0, False: 5]
| Branch (1068:39): [True: 0, False: 5]
------------------

Clang has added MC/DC support as of its 18.1.0 release. MC/DC is a fine-
grained coverage metric required by many automotive and aviation industrial
standards for certifying mission-critical software [3].

In the following example from arch/x86/events/probe.c, llvm-cov gives the
MC/DC measurement for the compound logic decision at line 43.

43| 12| if (msr[bit].test && !msr[bit].test(bit, data))
------------------
|---> MC/DC Decision Region (43:8) to (43:50)
|
| Number of Conditions: 2
| Condition C1 --> (43:8)
| Condition C2 --> (43:25)
|
| Executed MC/DC Test Vectors:
|
| C1, C2 Result
| 1 { T, F = F }
| 2 { T, T = T }
|
| C1-Pair: not covered
| C2-Pair: covered: (1,2)
| MC/DC Coverage for Decision: 50.00%
|
------------------
44| 5| continue;

As the results suggest, during the span of measurement, only condition C2
(!msr[bit].test(bit, data)) is covered. That means C2 was evaluated to both
true and false, and in those test vectors C2 affected the decision outcome
independently. Therefore MC/DC for this decision is 1 out of 2 (50.00%).

To do a full kernel measurement, instrument the kernel with
LLVM_COV_KERNEL_MCDC enabled, and optionally set a
LLVM_COV_KERNEL_MCDC_MAX_CONDITIONS value (the default is six). Run the
testsuites, and collect the raw profile data under
/sys/kernel/debug/llvm-cov/profraw. Such raw profile data can be merged and
indexed, and used for generating coverage reports in various formats.

$ cp /sys/kernel/debug/llvm-cov/profraw vmlinux.profraw
$ llvm-profdata merge vmlinux.profraw -o vmlinux.profdata
$ llvm-cov show --show-mcdc --show-mcdc-summary \
--format=text --use-color=false -output-dir=coverage_reports \
-instr-profile vmlinux.profdata vmlinux

The first two patches enable the llvm-cov infrastructure, where the first
enables source based code coverage and the second adds MC/DC support. The
next patch disables instrumentation for curve25519-x86_64.c for the same
reason as gcov. The final patch enables coverage for x86-64.

The choice to use a new Makefile variable LLVM_COV_PROFILE, instead of
reusing GCOV_PROFILE, was deliberate. More work needs to be done to
determine if it is appropriate to reuse the same flag. In addition, given
the fundamentally different approaches to instrumentation and the resulting
variation in coverage reports, there is a strong likelihood that coverage
type will need to be managed separately.

This work reuses code from a previous effort by Sami Tolvanen et al. [4].
Our aim is for source-based *code coverage* required for high assurance
(MC/DC) while [4] focused more on performance optimization.

This initial submission is restricted to x86-64. Support for other
architectures would need a bit more Makefile & linker script modification.
Informally we've confirmed that arm64 works and more are being tested.

Note that Source-based Code Coverage is Clang-specific and isn't compatible
with Clang's gcov support in kernel/gcov/. Currently, kernel/gcov/ is not
able to measure MC/DC without modifying CFLAGS_GCOV and it would face the
same issues in terms of source correlation as gcov in general does.

Some demo and results can be found in [5]. We will talk about this patch
series in the Refereed Track at LPC 2024 [6].

Known Limitations:

Kernel code with logical expressions exceeding
LVM_COV_KERNEL_MCDC_MAX_CONDITIONS will produce a compiler warning.
Expressions with up to 47 conditions are found in the Linux kernel source
tree (as of v6.11), but 46 seems to be the max value before the build fails
due to kernel size. As of LLVM 19 the max number of conditions possible is
32767.

As of LLVM 19, certain expressions are still not covered, and will produce
build warnings when they are encountered:

"[...] if a boolean expression is embedded in the nest of another boolean
expression but separated by a non-logical operator, this is also not
supported. For example, in x = (a && b && c && func(d && f)), the d && f
case starts a new boolean expression that is separated from the other
conditions by the operator func(). When this is encountered, a warning
will be generated and the boolean expression will not be
instrumented." [7]


[1] https://clang.llvm.org/docs/SourceBasedCodeCoverage.html
[2] https://en.wikipedia.org/wiki/Modified_condition%2Fdecision_coverage
[3] https://digital-library.theiet.org/content/journals/10.1049/sej.1994.0025
[4] https://lore.kernel.org/lkml/[email protected]/
[5] https://github.com/xlab-uiuc/linux-mcdc
[6] https://lpc.events/event/18/contributions/1718/
[7] https://clang.llvm.org/docs/SourceBasedCodeCoverage.html#mc-dc-instrumentation

---
v1 -> v2:

* Rebased onto v6.11-rc6 from v6.11-rc4.
* llvm-cov/Kconfig: add CONFIG_LLVM_COV_PROFILE_ALL and be consistent with
GCOV conventions.
* Makefile.lib: let the default of LLVM_COV_PROFILE depend on
CONFIG_LLVM_COV_PROFILE_ALL instead of always being "y", and use
$(is-kernel-object) following 9c2d1328f88a "kbuild: provide reasonable
defaults for tool coverage".
* asm-generic/vmlinux.lds.h: use macro BOUNDED_SECTION_POST_LABEL following
9b351be25360 "vmlinux.lds.h: add BOUNDED_SECTION* macros".
* Makefile.lib: "basetarget" -> "target-stem" following bf48d9b756b9
"kbuild: change tool coverage variables to take the path relative to
$(obj)".
* kernel/trace: remove "LLVM_COV_PROFILE = :n" and let user manually enable
it with LLVM_COV_PROFILE_ALL, if they are sure of the performance impact.
* x86, kbuild: remove "LLVM_COV_PROFILE = :n" instances that have been
taken care of by $is-kernel-object. Also remove the one in
arch/x86/platform/efi/ as llvm-cov doesn't have const propagation issues
as gcov did (ac6119e7f25b "efi/x86: Disable instrumentation in the EFI
runtime handling code").
* modfinal, kbuild: remove "LLVM_COV_PROFILE = :n" as llvm-cov doesn't use
function pointers so it doesn't have CFI-related issues like gcov did
(25a21fbb934a "kbuild: Disable GCOV for *.mod.o").

---

Wentao Zhang (4):
llvm-cov: add Clang's Source-based Code Coverage support
llvm-cov: add Clang's MC/DC support
x86: disable llvm-cov instrumentation
x86: enable llvm-cov support

Makefile | 9 ++
arch/Kconfig | 1 +
arch/x86/Kconfig | 2 +
arch/x86/crypto/Makefile | 3 +-
arch/x86/kernel/vmlinux.lds.S | 2 +
include/asm-generic/vmlinux.lds.h | 36 +++++
kernel/Makefile | 1 +
kernel/llvm-cov/Kconfig | 100 ++++++++++++
kernel/llvm-cov/Makefile | 5 +
kernel/llvm-cov/fs.c | 253 ++++++++++++++++++++++++++++++
kernel/llvm-cov/llvm-cov.h | 156 ++++++++++++++++++
scripts/Makefile.lib | 23 +++
scripts/mod/modpost.c | 2 +
13 files changed, 592 insertions(+), 1 deletion(-)
create mode 100644 kernel/llvm-cov/Kconfig
create mode 100644 kernel/llvm-cov/Makefile
create mode 100644 kernel/llvm-cov/fs.c
create mode 100644 kernel/llvm-cov/llvm-cov.h

--
2.45.2

Loading

0 comments on commit 343ffbf

Please sign in to comment.