Skip to content

Commit

Permalink
command: Start of propagating OpenTelemetry context
Browse files Browse the repository at this point in the history
Several times over the years we've considered adding tracing
instrumentation to Terraform, since even when running in isolation as a
CLI program it has a "distributed system-like" structure, with lots of
concurrent internal work and also some work delegated to provider plugins
that are essentially temporarily-running microservices.

However, it's always felt a bit overwhelming to do it because much of
Terraform predates the Go context.Context idiom and so it's tough to get
a clean chain of context.Context values all the way down the stack without
disturbing a lot of existing APIs.

This commit aims to just get that process started by establishing how a
context can propagate from "package main" into the command package,
focusing initially on "terraform init" and some other commands that share
some underlying functions with that command.

OpenTelemetry has emerged as a de-facto industry standard and so this uses
its API directly, without any attempt to hide it behind an abstraction.
The OpenTelemetry API is itself already an adapter layer, so we should be
able to swap in any backend that uses comparable concepts. For now we just
discard the tracing reports by default, and allow users to opt in to
delivering traces over OTLP by setting an environment variable when
running Terraform (the environment variable was established in an earlier
commit, so this commit builds on that.)

When tracing collection is enabled, every Terraform CLI run will generate
at least one overall span representing the command that was run. Some
commands might also create child spans, but most currently do not.
  • Loading branch information
apparentlymart committed Jul 14, 2023
1 parent 503896a commit 7bee77b
Show file tree
Hide file tree
Showing 13 changed files with 159 additions and 45 deletions.
11 changes: 10 additions & 1 deletion checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
package main

import (
"context"
"fmt"
"log"
"path/filepath"

"github.com/hashicorp/go-checkpoint"
"github.com/hashicorp/terraform/internal/command"
"github.com/hashicorp/terraform/internal/command/cliconfig"
"go.opentelemetry.io/otel/codes"
)

func init() {
Expand All @@ -21,14 +23,18 @@ var checkpointResult chan *checkpoint.CheckResponse

// runCheckpoint runs a HashiCorp Checkpoint request. You can read about
// Checkpoint here: https://github.com/hashicorp/go-checkpoint.
func runCheckpoint(c *cliconfig.Config) {
func runCheckpoint(ctx context.Context, c *cliconfig.Config) {
// If the user doesn't want checkpoint at all, then return.
if c.DisableCheckpoint {
log.Printf("[INFO] Checkpoint disabled. Not running.")
checkpointResult <- nil
return
}

ctx, span := tracer.Start(ctx, "HashiCorp Checkpoint")
_ = ctx // prevent staticcheck from complaining to avoid a maintenence hazard of having the wrong ctx in scope here
defer span.End()

configDir, err := cliconfig.ConfigDir()
if err != nil {
log.Printf("[ERR] Checkpoint setup error: %s", err)
Expand All @@ -55,7 +61,10 @@ func runCheckpoint(c *cliconfig.Config) {
})
if err != nil {
log.Printf("[ERR] Checkpoint error: %s", err)
span.SetStatus(codes.Error, err.Error())
resp = nil
} else {
span.SetStatus(codes.Ok, "checkpoint request succeeded")
}

checkpointResult <- resp
Expand Down
8 changes: 5 additions & 3 deletions commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
package main

import (
"context"
"os"
"os/signal"

"github.com/mitchellh/cli"

"github.com/hashicorp/go-plugin"
svchost "github.com/hashicorp/terraform-svchost"
"github.com/hashicorp/terraform-svchost/auth"
"github.com/hashicorp/terraform-svchost/disco"
"github.com/mitchellh/cli"

"github.com/hashicorp/terraform/internal/addrs"
"github.com/hashicorp/terraform/internal/command"
Expand Down Expand Up @@ -53,6 +53,7 @@ var HiddenCommands map[string]struct{}
var Ui cli.Ui

func initCommands(
ctx context.Context,
originalWorkingDir string,
streams *terminal.Streams,
config *cliconfig.Config,
Expand Down Expand Up @@ -101,7 +102,8 @@ func initCommands(

PluginCacheMayBreakDependencyLockFile: config.PluginCacheMayBreakDependencyLockFile,

ShutdownCh: makeShutdownCh(),
ShutdownCh: makeShutdownCh(),
CallerContext: ctx,

ProviderSource: providerSrc,
ProviderDevOverrides: providerDevOverrides,
Expand Down
11 changes: 8 additions & 3 deletions internal/command/get.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package command

import (
"context"
"fmt"
"strings"

Expand All @@ -28,6 +29,10 @@ func (c *GetCommand) Run(args []string) int {
return 1
}

// Initialization can be aborted by interruption signals
ctx, done := c.InterruptibleContext(c.CommandContext())
defer done()

path, err := ModulePath(cmdFlags.Args())
if err != nil {
c.Ui.Error(err.Error())
Expand All @@ -36,7 +41,7 @@ func (c *GetCommand) Run(args []string) int {

path = c.normalizePath(path)

abort, diags := getModules(&c.Meta, path, update)
abort, diags := getModules(ctx, &c.Meta, path, update)
c.showDiagnostics(diags)
if abort || diags.HasErrors() {
return 1
Expand Down Expand Up @@ -76,10 +81,10 @@ func (c *GetCommand) Synopsis() string {
return "Install or upgrade remote Terraform modules"
}

func getModules(m *Meta, path string, upgrade bool) (abort bool, diags tfdiags.Diagnostics) {
func getModules(ctx context.Context, m *Meta, path string, upgrade bool) (abort bool, diags tfdiags.Diagnostics) {
hooks := uiModuleInstallHooks{
Ui: m.Ui,
ShowLocalPaths: true,
}
return m.installModules(path, upgrade, hooks)
return m.installModules(ctx, path, upgrade, hooks)
}
56 changes: 41 additions & 15 deletions internal/command/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ import (
svchost "github.com/hashicorp/terraform-svchost"
"github.com/posener/complete"
"github.com/zclconf/go-cty/cty"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"

"github.com/hashicorp/terraform/internal/addrs"
"github.com/hashicorp/terraform/internal/backend"
Expand Down Expand Up @@ -107,6 +110,10 @@ func (c *InitCommand) Run(args []string) int {
return 1
}

// Initialization can be aborted by interruption signals
ctx, done := c.InterruptibleContext(c.CommandContext())
defer done()

// This will track whether we outputted anything so that we know whether
// to output a newline before the success message
var header bool
Expand Down Expand Up @@ -134,12 +141,19 @@ func (c *InitCommand) Run(args []string) int {
ShowLocalPaths: false, // since they are in a weird location for init
}

initDirFromModuleAbort, initDirFromModuleDiags := c.initDirFromModule(path, src, hooks)
ctx, span := tracer.Start(ctx, "-from-module=...", trace.WithAttributes(
attribute.String("module_source", src),
))

initDirFromModuleAbort, initDirFromModuleDiags := c.initDirFromModule(ctx, path, src, hooks)
diags = diags.Append(initDirFromModuleDiags)
if initDirFromModuleAbort || initDirFromModuleDiags.HasErrors() {
c.showDiagnostics(diags)
span.SetStatus(codes.Error, "module installation failed")
span.End()
return 1
}
span.End()

c.Ui.Output("")
}
Expand Down Expand Up @@ -181,12 +195,12 @@ func (c *InitCommand) Run(args []string) int {

switch {
case flagCloud && rootModEarly.CloudConfig != nil:
back, backendOutput, backDiags = c.initCloud(rootModEarly, flagConfigExtra)
back, backendOutput, backDiags = c.initCloud(ctx, rootModEarly, flagConfigExtra)
case flagBackend:
back, backendOutput, backDiags = c.initBackend(rootModEarly, flagConfigExtra)
back, backendOutput, backDiags = c.initBackend(ctx, rootModEarly, flagConfigExtra)
default:
// load the previously-stored backend config
back, backDiags = c.Meta.backendFromState()
back, backDiags = c.Meta.backendFromState(ctx)
}
if backendOutput {
header = true
Expand Down Expand Up @@ -219,7 +233,7 @@ func (c *InitCommand) Run(args []string) int {
}

if flagGet {
modsOutput, modsAbort, modsDiags := c.getModules(path, rootModEarly, flagUpgrade)
modsOutput, modsAbort, modsDiags := c.getModules(ctx, path, rootModEarly, flagUpgrade)
diags = diags.Append(modsDiags)
if modsAbort || modsDiags.HasErrors() {
c.showDiagnostics(diags)
Expand Down Expand Up @@ -287,7 +301,7 @@ func (c *InitCommand) Run(args []string) int {
}

// Now that we have loaded all modules, check the module tree for missing providers.
providersOutput, providersAbort, providerDiags := c.getProviders(config, state, flagUpgrade, flagPluginPath, flagLockfile)
providersOutput, providersAbort, providerDiags := c.getProviders(ctx, config, state, flagUpgrade, flagPluginPath, flagLockfile)
diags = diags.Append(providerDiags)
if providersAbort || providerDiags.HasErrors() {
c.showDiagnostics(diags)
Expand Down Expand Up @@ -328,7 +342,7 @@ func (c *InitCommand) Run(args []string) int {
return 0
}

func (c *InitCommand) getModules(path string, earlyRoot *configs.Module, upgrade bool) (output bool, abort bool, diags tfdiags.Diagnostics) {
func (c *InitCommand) getModules(ctx context.Context, path string, earlyRoot *configs.Module, upgrade bool) (output bool, abort bool, diags tfdiags.Diagnostics) {
testModules := false // We can also have modules buried in test files.
for _, file := range earlyRoot.Tests {
for _, run := range file.Runs {
Expand All @@ -343,6 +357,11 @@ func (c *InitCommand) getModules(path string, earlyRoot *configs.Module, upgrade
return false, false, nil
}

ctx, span := tracer.Start(ctx, "install modules", trace.WithAttributes(
attribute.Bool("upgrade", upgrade),
))
defer span.End()

if upgrade {
c.Ui.Output(c.Colorize().Color("[reset][bold]Upgrading modules..."))
} else {
Expand All @@ -354,7 +373,7 @@ func (c *InitCommand) getModules(path string, earlyRoot *configs.Module, upgrade
ShowLocalPaths: true,
}

installAbort, installDiags := c.installModules(path, upgrade, hooks)
installAbort, installDiags := c.installModules(ctx, path, upgrade, hooks)
diags = diags.Append(installDiags)

// At this point, installModules may have generated error diags or been
Expand All @@ -377,7 +396,11 @@ func (c *InitCommand) getModules(path string, earlyRoot *configs.Module, upgrade
return true, installAbort, diags
}

func (c *InitCommand) initCloud(root *configs.Module, extraConfig rawFlags) (be backend.Backend, output bool, diags tfdiags.Diagnostics) {
func (c *InitCommand) initCloud(ctx context.Context, root *configs.Module, extraConfig rawFlags) (be backend.Backend, output bool, diags tfdiags.Diagnostics) {
ctx, span := tracer.Start(ctx, "initialize Terraform Cloud")
_ = ctx // prevent staticcheck from complaining to avoid a maintenence hazard of having the wrong ctx in scope here
defer span.End()

c.Ui.Output(c.Colorize().Color("\n[reset][bold]Initializing Terraform Cloud..."))

if len(extraConfig.AllItems()) != 0 {
Expand All @@ -401,7 +424,11 @@ func (c *InitCommand) initCloud(root *configs.Module, extraConfig rawFlags) (be
return back, true, diags
}

func (c *InitCommand) initBackend(root *configs.Module, extraConfig rawFlags) (be backend.Backend, output bool, diags tfdiags.Diagnostics) {
func (c *InitCommand) initBackend(ctx context.Context, root *configs.Module, extraConfig rawFlags) (be backend.Backend, output bool, diags tfdiags.Diagnostics) {
ctx, span := tracer.Start(ctx, "initialize backend")
_ = ctx // prevent staticcheck from complaining to avoid a maintenence hazard of having the wrong ctx in scope here
defer span.End()

c.Ui.Output(c.Colorize().Color("\n[reset][bold]Initializing the backend..."))

var backendConfig *configs.Backend
Expand Down Expand Up @@ -482,7 +509,10 @@ the backend configuration is present and valid.

// Load the complete module tree, and fetch any missing providers.
// This method outputs its own Ui.
func (c *InitCommand) getProviders(config *configs.Config, state *states.State, upgrade bool, pluginDirs []string, flagLockfile string) (output, abort bool, diags tfdiags.Diagnostics) {
func (c *InitCommand) getProviders(ctx context.Context, config *configs.Config, state *states.State, upgrade bool, pluginDirs []string, flagLockfile string) (output, abort bool, diags tfdiags.Diagnostics) {
ctx, span := tracer.Start(ctx, "install providers")
defer span.End()

// Dev overrides cause the result of "terraform init" to be irrelevant for
// any overridden providers, so we'll warn about it to avoid later
// confusion when Terraform ends up using a different provider than the
Expand Down Expand Up @@ -540,10 +570,6 @@ func (c *InitCommand) getProviders(config *configs.Config, state *states.State,
log.Printf("[DEBUG] will search for provider plugins in %s", pluginDirs)
}

// Installation can be aborted by interruption signals
ctx, done := c.InterruptibleContext()
defer done()

// We want to print out a nice warning if we don't manage to pull
// checksums for all our providers. This is tracked via callbacks
// and incomplete providers are stored here for later analysis.
Expand Down
45 changes: 43 additions & 2 deletions internal/command/meta.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,15 @@ type Meta struct {
// web browser.
BrowserLauncher webbrowser.Launcher

// A context.Context provided by the caller -- typically "package main" --
// which might be carrying telemetry-related metadata and so should be
// used when creating downstream traces, etc.
//
// This isn't guaranteed to be set, so use [Meta.CommandContext] to
// safely create a context for the entire execution of a command, which
// will be connected to this parent context if it's present.
CallerContext context.Context

// When this channel is closed, the command will be cancelled.
ShutdownCh <-chan struct{}

Expand Down Expand Up @@ -393,12 +402,23 @@ func (m *Meta) StdinPiped() bool {
// InterruptibleContext returns a context.Context that will be cancelled
// if the process is interrupted by a platform-specific interrupt signal.
//
// The typical way to use this is to pass the result of [Meta.CommandContext]
// as the base context, but that's appropriate only if the interruptible
// context is being created directly inside the "Run" method of a particular
// command, to create a context representing the entire remaining runtime of
// that command:
//
// As usual with cancelable contexts, the caller must always call the given
// cancel function once all operations are complete in order to make sure
// that the context resources will still be freed even if there is no
// interruption.
func (m *Meta) InterruptibleContext() (context.Context, context.CancelFunc) {
base := context.Background()
//
// // This example is only for when using this function very early in
// // the "Run" method of a Command implementation. If you already have
// // an active context, pass that in as base instead.
// ctx, done := c.InterruptibleContext(c.CommandContext())
// defer done()
func (m *Meta) InterruptibleContext(base context.Context) (context.Context, context.CancelFunc) {
if m.ShutdownCh == nil {
// If we're running in a unit testing context without a shutdown
// channel populated then we'll return an uncancelable channel.
Expand All @@ -417,6 +437,27 @@ func (m *Meta) InterruptibleContext() (context.Context, context.CancelFunc) {
return ctx, cancel
}

// CommandContext returns the "root context" to use in the main Run function
// of a command.
//
// This method is just a substitute for passing a context directly to the
// "Run" method of a command, which we can't do because that API is owned by
// mitchellh/cli rather than by Terraform. Use this only in situations
// comparable to the context having been passed in as an argument to Run.
//
// If the caller (e.g. "package main") provided a context when it instantiated
// the Meta then the returned context will inherit all of its values, deadlines,
// etc. If the caller did not provide a context then the result is an inert
// background context ready to be passed to other functions.
func (m *Meta) CommandContext() context.Context {
if m.CallerContext == nil {
return context.Background()
}
// We just return the caller context directly for now, since we don't
// have anything to add to it.
return m.CallerContext
}

// RunOperation executes the given operation on the given backend, blocking
// until that operation completes or is interrupted, and then returns
// the RunningOperation object representing the completed or
Expand Down
8 changes: 4 additions & 4 deletions internal/command/meta_backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -766,10 +766,10 @@ func (m *Meta) determineInitReason(previousBackendType string, currentBackendTyp
}

// backendFromState returns the initialized (not configured) backend directly
// from the state. This should be used only when a user runs `terraform init
// -backend=false`. This function returns a local backend if there is no state
// or no backend configured.
func (m *Meta) backendFromState() (backend.Backend, tfdiags.Diagnostics) {
// from the backend state. This should be used only when a user runs
// `terraform init -backend=false`. This function returns a local backend if
// there is no backend state or no backend configured.
func (m *Meta) backendFromState(ctx context.Context) (backend.Backend, tfdiags.Diagnostics) {
var diags tfdiags.Diagnostics
// Get the path to where we store a local cache of backend configuration
// if we're using a remote backend. This may not yet exist which means
Expand Down
3 changes: 2 additions & 1 deletion internal/command/meta_backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package command

import (
"context"
"io/ioutil"
"os"
"path/filepath"
Expand Down Expand Up @@ -1939,7 +1940,7 @@ func TestBackendFromState(t *testing.T) {
// them to match just for this test.
wd.OverrideDataDir(".")

stateBackend, diags := m.backendFromState()
stateBackend, diags := m.backendFromState(context.Background())
if diags.HasErrors() {
t.Fatal(diags.Err())
}
Expand Down
Loading

0 comments on commit 7bee77b

Please sign in to comment.