From 313d0e2b1f51832f6b3c0575461b85b5da2822e9 Mon Sep 17 00:00:00 2001 From: Alex Buchanan Date: Wed, 13 Nov 2024 12:52:39 -0800 Subject: [PATCH 1/2] fix: attach all logs on chart failure --- internal/cmd/local/local/install.go | 75 +++++++++++++--------- internal/cmd/local/local/install_test.go | 2 +- internal/cmd/local/local/log_utils.go | 18 ------ internal/cmd/local/local/log_utils_test.go | 32 +++++---- internal/cmd/local/localerr/localerr.go | 5 ++ 5 files changed, 70 insertions(+), 62 deletions(-) diff --git a/internal/cmd/local/local/install.go b/internal/cmd/local/local/install.go index 870c468..8d91237 100644 --- a/internal/cmd/local/local/install.go +++ b/internal/cmd/local/local/install.go @@ -2,6 +2,7 @@ package local import ( "context" + "errors" "fmt" "net/http" "os" @@ -253,7 +254,9 @@ func (c *Command) Install(ctx context.Context, opts *InstallOpts) error { }); err != nil { // if trace.SpanError isn't called here, the logs attached // in the diagnoseAirbyteChartFailure method are lost - return trace.SpanError(span, c.diagnoseAirbyteChartFailure(ctx, err)) + err = c.diagnoseAirbyteChartFailure(ctx, err) + err = fmt.Errorf("unable to install airbyte chart: %w", err) + return trace.SpanError(span, err) } nginxValues, err := helm.BuildNginxValues(c.portHTTP) @@ -318,46 +321,54 @@ func (c *Command) Install(ctx context.Context, opts *InstallOpts) error { } func (c *Command) diagnoseAirbyteChartFailure(ctx context.Context, chartErr error) error { - if podList, err := c.k8s.PodList(ctx, common.AirbyteNamespace); err == nil { - var errors []string - for _, pod := range podList.Items { - pterm.Debug.Println(fmt.Sprintf("looking at %s\n %s(%s)", pod.Name, pod.Status.Phase, pod.Status.Reason)) - if pod.Status.Phase != corev1.PodFailed { - continue - } - - msg := "unknown" + if errors.Is(ctx.Err(), context.Canceled) { + return chartErr + } + + podList, err := c.k8s.PodList(ctx, common.AirbyteNamespace) + if err != nil { + return chartErr + } - logs, err := c.k8s.LogsGet(ctx, common.AirbyteNamespace, pod.Name) - if err != nil { - msg = "unknown: failed to get pod logs." - } - - preview := logs - if len(preview) > 50 { - preview = preview[:50] - } - pterm.Debug.Println("found logs: ", preview) + var failedPods []string + for _, pod := range podList.Items { + if pod.Status.Phase == corev1.PodFailed { + failedPods = append(failedPods, pod.Name) + } + } - trace.AttachLog(fmt.Sprintf("%s.log", pod.Name), logs) + // If none of the pods failed, don't bother looking at logs. + if len(failedPods) == 0 { + return chartErr + } - m, err := getLastLogError(strings.NewReader(logs)) - if err != nil { - msg = "unknown: failed to find error log." - } - if m != "" { - msg = m - } + for _, pod := range podList.Items { + // skip pods that aren't part of the platform release (e.g. job pods) + if !strings.HasPrefix(pod.Name, common.AirbyteChartRelease) { + continue + } + pterm.Debug.Printfln("looking at %s\n %s(%s)", pod.Name, pod.Status.Phase, pod.Status.Reason) - errors = append(errors, fmt.Sprintf("pod %s: %s", pod.Name, msg)) + logs, err := c.k8s.LogsGet(ctx, common.AirbyteNamespace, pod.Name) + if err != nil { + pterm.Debug.Printfln("failed to get pod logs: %s", err) + continue } - if errors != nil { - return fmt.Errorf("unable to install airbyte chart:\n%s", strings.Join(errors, "\n")) + preview := logs + if len(preview) > 50 { + preview = preview[:50] } + pterm.Debug.Println("found logs: ", preview) + + trace.AttachLog(fmt.Sprintf("%s.log", pod.Name), logs) + } + + if len(failedPods) == 1 && failedPods[0] == common.AirbyteBootloaderPodName { + return localerr.ErrBootloaderFailed } - return fmt.Errorf("unable to install airbyte chart: %w", chartErr) + return chartErr } func (c *Command) handleIngress(ctx context.Context, hosts []string) error { diff --git a/internal/cmd/local/local/install_test.go b/internal/cmd/local/local/install_test.go index cb8ac2c..2998b6d 100644 --- a/internal/cmd/local/local/install_test.go +++ b/internal/cmd/local/local/install_test.go @@ -227,7 +227,7 @@ func TestCommand_InstallError(t *testing.T) { t.Fatal(err) } err = c.Install(context.Background(), installOpts) - expect := "unable to install airbyte chart:\npod test-pod-1: unknown" + expect := "unable to install airbyte chart: unable to install helm: test error" if expect != err.Error() { t.Errorf("expected %q but got %q", expect, err) } diff --git a/internal/cmd/local/local/log_utils.go b/internal/cmd/local/local/log_utils.go index ad6a30f..0d491b1 100644 --- a/internal/cmd/local/local/log_utils.go +++ b/internal/cmd/local/local/log_utils.go @@ -59,21 +59,3 @@ func (j *logScanner) Scan() bool { func (j *logScanner) Err() error { return j.scanner.Err() } - -func getLastLogError(r io.Reader) (string, error) { - var lines []logLine - s := newLogScanner(r) - for s.Scan() { - lines = append(lines, s.line) - } - if s.Err() != nil { - return "", s.Err() - } - - for i := len(lines) - 1; i >= 0; i-- { - if lines[i].level == "ERROR" { - return lines[i].msg, nil - } - } - return "", nil -} diff --git a/internal/cmd/local/local/log_utils_test.go b/internal/cmd/local/local/log_utils_test.go index 6cc0e2c..365a716 100644 --- a/internal/cmd/local/local/log_utils_test.go +++ b/internal/cmd/local/local/log_utils_test.go @@ -22,6 +22,27 @@ Caused by: io.airbyte.db.check.DatabaseCheckException: Unable to connect to the 2024-09-12T15:56:33.125352208Z Thread-4 INFO Loading mask data from '/seed/specs_secrets_mask.yaml `) +var testLogs2 = strings.TrimSpace(` +2024-11-12 20:52:07,403 [main] ERROR i.a.d.c.DatabaseAvailabilityCheck(lambda$isDatabaseConnected$1):78 - Failed to verify database connection. +org.jooq.exception.DataAccessException: Error getting connection from data source HikariDataSource (HikariPool-1) + at org.jooq_3.19.7.POSTGRES.debug(Unknown Source) + at org.jooq.impl.DataSourceConnectionProvider.acquire(DataSourceConnectionProvider.java:90) +Caused by: java.sql.SQLTransientConnectionException: HikariPool-1 - Connection is not available, request timed out after 30110ms (total=0, active=0, idle=0, waiting=0) + at com.zaxxer.hikari.pool.HikariPool.createTimeoutException(HikariPool.java:686) + at com.zaxxer.hikari.pool.HikariPool.getConnection(HikariPool.java:179) + at com.zaxxer.hikari.pool.HikariPool.getConnection(HikariPool.java:144) + at com.zaxxer.hikari.HikariDataSource.getConnection(HikariDataSource.java:99) + at org.jooq.impl.DataSourceConnectionProvider.acquire(DataSourceConnectionProvider.java:87) + ... 22 common frames omitted +Caused by: org.postgresql.util.PSQLException: The connection attempt failed. + at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:364) + at org.postgresql.core.ConnectionFactory.openConnection(ConnectionFactory.java:54) +Caused by: java.net.UnknownHostException: airbyte-db-svc + at java.base/sun.nio.ch.NioSocketImpl.connect(NioSocketImpl.java:567) + at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:268) + ... 14 common frames omitted +`) + func TestJavaLogScanner(t *testing.T) { s := newLogScanner(strings.NewReader(testLogs)) @@ -47,14 +68,3 @@ func TestJavaLogScanner(t *testing.T) { expectLogLine("INFO", "i.m.r.Micronaut(lambda$start$0):118 - Embedded Application shutting down") expectLogLine("INFO", "2024-09-12T15:56:33.125352208Z Thread-4 INFO Loading mask data from '/seed/specs_secrets_mask.yaml") } - -func TestLastErrorLog(t *testing.T) { - l, err := getLastLogError(strings.NewReader(testLogs)) - if err != nil { - t.Errorf("unexpected error %s", err) - } - expect := "Caused by: io.airbyte.db.check.DatabaseCheckException: Unable to connect to the database." - if l != expect { - t.Errorf("expected %q but got %q", expect, l) - } -} diff --git a/internal/cmd/local/localerr/localerr.go b/internal/cmd/local/localerr/localerr.go index ce144eb..f126c9b 100644 --- a/internal/cmd/local/localerr/localerr.go +++ b/internal/cmd/local/localerr/localerr.go @@ -80,4 +80,9 @@ IP addresses won't work. Ports won't work (e.g. example:8000). URLs won't work ( By default, abctl will allow access from any hostname or IP, so you might not need the --host flag.`, } + + ErrBootloaderFailed = &LocalError{ + msg: "bootloader failed", + help: "The bootloader failed to its initialization checks or migrations. Try running again with --verbose to see the full bootloader logs.", + } ) From 33a74916e88ebf2539d894c35ad2da9746da58ab Mon Sep 17 00:00:00 2001 From: Alex Buchanan Date: Wed, 13 Nov 2024 13:05:29 -0800 Subject: [PATCH 2/2] remove unused test log --- internal/cmd/local/local/log_utils_test.go | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/internal/cmd/local/local/log_utils_test.go b/internal/cmd/local/local/log_utils_test.go index 365a716..a16f646 100644 --- a/internal/cmd/local/local/log_utils_test.go +++ b/internal/cmd/local/local/log_utils_test.go @@ -22,27 +22,6 @@ Caused by: io.airbyte.db.check.DatabaseCheckException: Unable to connect to the 2024-09-12T15:56:33.125352208Z Thread-4 INFO Loading mask data from '/seed/specs_secrets_mask.yaml `) -var testLogs2 = strings.TrimSpace(` -2024-11-12 20:52:07,403 [main] ERROR i.a.d.c.DatabaseAvailabilityCheck(lambda$isDatabaseConnected$1):78 - Failed to verify database connection. -org.jooq.exception.DataAccessException: Error getting connection from data source HikariDataSource (HikariPool-1) - at org.jooq_3.19.7.POSTGRES.debug(Unknown Source) - at org.jooq.impl.DataSourceConnectionProvider.acquire(DataSourceConnectionProvider.java:90) -Caused by: java.sql.SQLTransientConnectionException: HikariPool-1 - Connection is not available, request timed out after 30110ms (total=0, active=0, idle=0, waiting=0) - at com.zaxxer.hikari.pool.HikariPool.createTimeoutException(HikariPool.java:686) - at com.zaxxer.hikari.pool.HikariPool.getConnection(HikariPool.java:179) - at com.zaxxer.hikari.pool.HikariPool.getConnection(HikariPool.java:144) - at com.zaxxer.hikari.HikariDataSource.getConnection(HikariDataSource.java:99) - at org.jooq.impl.DataSourceConnectionProvider.acquire(DataSourceConnectionProvider.java:87) - ... 22 common frames omitted -Caused by: org.postgresql.util.PSQLException: The connection attempt failed. - at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:364) - at org.postgresql.core.ConnectionFactory.openConnection(ConnectionFactory.java:54) -Caused by: java.net.UnknownHostException: airbyte-db-svc - at java.base/sun.nio.ch.NioSocketImpl.connect(NioSocketImpl.java:567) - at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:268) - ... 14 common frames omitted -`) - func TestJavaLogScanner(t *testing.T) { s := newLogScanner(strings.NewReader(testLogs))