Caddy源码阅读之 Caddyfile 解析器

本文记录 Caddy 如何对 Caddyfile 进行解析的,当了解了如何对 Caddyfile 解析,也能使自己更好的了解 Caddy 如何去组合管理各种module

  • 先给一个 Caddyfile 文件示例:
  (common) {
      encode zstd gzip
  }

  :8080 {
      import common
      root * /www/j2do
      file_server browse
  }

  :8081 {
      import common
      root * /www/j2do
      file_server browse
  }
  • 首先我们通过跟踪代码找到 Caddyfile 文件解析的入口:
  caddyfile.Parse (parse.go:51) github.com/caddyserver/caddy/v2/caddyconfig/caddyfile
  caddyfile.Adapter.Adapt (adapter.go:45) github.com/caddyserver/caddy/v2/caddyconfig/caddyfile
  caddycmd.loadConfig (main.go:176) github.com/caddyserver/caddy/v2/cmd
  caddycmd.cmdRun (commandfuncs.go:205) github.com/caddyserver/caddy/v2/cmd
  caddycmd.Main (main.go:84) github.com/caddyserver/caddy/v2/cmd
  main.main (main.go:37) main

通过上面堆栈很容易找到 caddyconfig/caddyfile/adapter.go 实际上就是 CaddyCaddyfile 解析的核心代码文件,Parse 函数即为解析入口

  • ParseCaddyfile 文件进行结构化解析,并返回 ServerBlock 结构体
  // filename 就是 Caddyfile 源文件,body是Caddyfile的内容
  // 第一步将文本转为结构体
	serverBlocks, err := Parse(filename, body)
	if err != nil {
		return nil, nil, err
	}
  • ServerBlock 结构体分析
  type ServerBlock struct {
    HasBraces bool
    Keys      []string
    Segments  []Token
  }
  // 每一个Token结构体就是代表一个单词,Caddyfile中以空格分开
  // 例如:`host:123 { directive }` 会被解析为:
  // []Token{
  //     {Line: 1, Text: "host:123"},
  //     {Line: 1, Text: "{"},
  //     {Line: 1, Text: "directive"},
  //     {Line: 1, Text: "}"},
  // }
  type Token struct {
      File        string
      Line        int
      Text        string
      inSnippet   bool
      snippetName string
  }
  • Parse 内部实现是通过 allTokens 函数将 Caddyfile 文件纯文本转为基于单词的 Token 结构体数组,方便操作,再通过 parseAll 解析为多组 ServerBlock
  func Parse(filename string, input []byte) ([]ServerBlock, error) {
    tokens, err := allTokens(filename, input)
    if err != nil {
      return nil, err
    }
    // 基于tokens构建一个解析器,Token转为ServerBlock的逻辑即由parser结构体内部next函数实现
    p := parser{
      Dispenser: NewDispenser(tokens),
      importGraph: importGraph{
        nodes: make(map[string]bool),
        edges: make(adjacency),
      },
    }
    return p.parseAll()
  }

allTokens 完成了文本到结构体([]Token)的转化; 对文本处理的核心函数在 caddyconfig/caddyfile/lexer.go 文件中的 next()函数; 有兴趣可以看下函数实现,该函数逻辑如下:

  // 首先完成配置文件中的全局变量替换,例如:`Caddyfile` 文件中 {$FOOBAR} 讲替换为 `foobar`
  os.Setenv("FOOBAR", "foobar")
  {
      input:  "foo {$FOOBAR} bar",
      expect: "foo foobar bar",
  },
  // 然后通过next函数进行文本字符的遍历解析

  // next 函数核心就是查找一个有效的Token
  // Token 通常以空白符进行分割,遇到下面情况需要特殊处理:
  // 1. 以`"`开头,直到以`"`结尾,并且忽略`"`,支持转义`\"`被记录为`"`,其他字符均不需要转义
  // 2. 遇到`#`则忽略后面的字符,直到下一行
  // 正确解析出了Token返回true,否则返回false
  // next loads the next token into the lexer.
  // A token is delimited by whitespace, unless
  // the token starts with a quotes character (")
  // in which case the token goes until the closing
  // quotes (the enclosing quotes are not included).
  // Inside quoted strings, quotes may be escaped
  // with a preceding \ character. No other chars
  // may be escaped. The rest of the line is skipped
  // if a "#" character is read in. Returns true if
  // a token was loaded; false otherwise.
  • 再通过 parseAll 函数遍历所有的 Token 依次解析出一组组的 ServerBlock

parseAll 会调用 parseOne 依次解析出 ServerBlock, parseOne直接调用 begin 函数,所以从 Token 转换到 ServerBlock关键逻辑在 begin函数

  func (p *parser) begin() error {
    if len(p.tokens) == 0 {
      return nil
    }
    // 该函数核心是解析出key,并且定位到区块,即 `{}` 开始的位置
    // 例如 `localhost:8080 {}` 的key就是`localhost:8080`,{} 里面的定义为 `directives(指令)`
    err := p.addresses()

    if err != nil {
      return err
    }

    if p.eof {
      // this happens if the Caddyfile consists of only
      // a line of addresses and nothing else
      return nil
    }
    // A snippet block is a single key with parens. Nothing else qualifies.
    // 这里的 `snippet block` 是指被小括号包裹的key,比如示例中的 `(common)`, 可以被后面区块导入
    if ok, name := p.isSnippet(); ok {
      if p.definedSnippets == nil {
        p.definedSnippets = map[string][]Token{}
      }
      if _, found := p.definedSnippets[name]; found {
        return p.Errf("redeclaration of previously declared snippet %s", name)
      }
      // consume all tokens til matched close brace
      tokens, err := p.snippetTokens()
      if err != nil {
        return err
      }
      // Just as we need to track which file the token comes from, we need to
      // keep track of which snippets do the tokens come from. This is helpful
      // in tracking import cycles across files/snippets by namespacing them. Without
      // this we end up with false-positives in cycle-detection.
      for k, v := range tokens {
        v.inSnippet = true
        v.snippetName = name
        tokens[k] = v
      }
      p.definedSnippets[name] = tokens
      // empty block keys so we don't save this block as a real server.
      p.block.Keys = nil
      return nil
    }
    // 该函数会解析{}内部的所有指令
    return p.blockContents()
  }
  • blockContents 的核心实现在 directives函数
  // directives parses through all the lines for directives
  // and it expects the next token to be the first
  // directive. It goes until EOF or closing curly brace
  // which ends the server block.
  func (p *parser) directives() error {
    for p.Next() {
      // end of server block
      if p.Val() == "}" {
        // p.nesting has already been decremented
        break
      }
      // 特殊情况:遇到import指令需要替换tokens
      // special case: import directive replaces tokens during parse-time
      if p.Val() == "import" {
        err := p.doImport()
        if err != nil {
          return err
        }
        p.cursor-- // cursor is advanced when we continue, so roll back one more
        continue
      }
      // 正常情况:解析指令为 `segment`(可以理解为指令的片段,由一组Token构成)
      // normal case: parse a directive as a new segment
      // (a "segment" is a line which starts with a directive
      // and which ends at the end of the line or at the end of
      // the block that is opened at the end of the line)
      if err := p.directive(); err != nil {
        return err
      }
    }

    return nil
  }
  • 通过解析 Caddyfile 文件最终被解析为如下结构:
  // 伪代码
  []ServerBlock{
    {
      Keys: ":8080",
      Segments: []Segment{
        {
          []Token:{
            { Text: "encode" },
            { Text: "zstd" },
            { Text: "gzip" },
          }
        },
        {
          []Token:{
            { Text: "root" },
            { Text: "*" },
            { Text: "/www/j2do" },
          }
        },
        {
          []Token:{
            { Text: "file_server" },
            { Text: "browse" },
          }
        },
      }
    },{
      Keys: ":8081",
      Segments: []Segment{
        {
          []Token:{
            { Text: "encode" },
            { Text: "zstd" },
            { Text: "gzip" },
          }
        },
        {
          []Token:{
            { Text: "root" },
            { Text: "*" },
            { Text: "/www/j2do" },
          }
        },
        {
          []Token:{
            { Text: "file_server" },
            { Text: "browse" },
          }
        },
      }
    },
  }
  • 接下来通过 Setup 解析 []ServerBlockcaddy.Config 结构体

Setup 定义的是一个接口,真正实现在 caddyconfig/httpcaddyfile/httptype.go 文件中

	cfg, warnings, err := a.ServerType.Setup(serverBlocks, options)
	if err != nil {
		return nil, warnings, err
	}

Setup 核心流程如下:

  1. 全局参数合并,相关全局配置参考 https://caddyserver.com/docs/caddyfile/options
  2. 占位符替换,具体值可参考 https://caddyserver.com/docs/caddyfile/concepts#placeholders
  3. 接下来依次转换 ServerBlockcaddy.Config,分析这段代码的过程也能学习到 Caddy 如何组织 module
  // sb.block.Segments 即对应 `ServerBlock` 结构体下的 `Segments`
  for _, segment := range sb.block.Segments {
    // 获取指令,即获取Segment的第一个元素的Text
    // if len(s) > 0 {
		//  return s[0].Text
	  // }
    dir := segment.Directive()

    if strings.HasPrefix(dir, matcherPrefix) {
      // matcher definitions were pre-processed
      continue
    }
    // 这里是关键,从已注册的指令中查找对应的处理函数,所有的指令均在启动时通过init函数注册
    dirFunc, ok := registeredDirectives[dir]
    if !ok {
      // 找不到即报错
      tkn := segment[0]
      message := "%s:%d: unrecognized directive: %s"
      if !sb.block.HasBraces {
        message += "\nDid you mean to define a second site? If so, you must use curly braces around each site to separate their configurations."
      }
      return nil, warnings, fmt.Errorf(message, tkn.File, tkn.Line, dir)
    }

    h := Helper{
      Dispenser:    caddyfile.NewDispenser(segment),
      options:      options,
      warnings:     &warnings,
      matcherDefs:  matcherDefs,
      parentBlock:  sb.block,
      groupCounter: gc,
      State:        state,
    }
    // 调用对应的处理函数,将 `Segments` 转为 `[]ConfigValue`, 到此即完成了 Caddyfile 配置文件到内部使用的json结构体的转换
    // 例如:
    // ConfigValue {
    //   Value: caddyhttp.Route{
    //     HandlersRaw: []string{
    //       `{"handler":"vars","root":"/www/j2do"}`
    //     }
    //   }
    // }
    // ConfigValue {
    //   Value: caddyhttp.Route{
    //     HandlersRaw: []string{
    //       `{"encodings":{"gzip":{},"zstd":{}},"handler":"encode","prefer":["zstd","gzip"]}`
    //     },
    //   }
    // }
    results, err := dirFunc(h)
    if err != nil {
      return nil, warnings, fmt.Errorf("parsing caddyfile tokens for '%s': %v", dir, err)
    }
    // 保存解析结果
    for _, result := range results {
      sb.pile[result.Class] = append(sb.pile[result.Class], result)
    }
  }
  // 接下来 Caddy 会根据解析出来的配置构建 `caddyhttp.Server` 实例配置,具体处理函数即 `serversFromPairings`
  // `serversFromPairings` 检查运行 http server的关键配置,并对一些缺省配置添加默认值
  // pairings 参数即上面 result 结果
  // each pairing of listener addresses to list of server
  // blocks is basically a server definition
  servers, err := st.serversFromPairings(pairings, options, &warnings, gc)
  if err != nil {
    return nil, warnings, err
  }
  // 将所有的servers放在一个http app结构体中
  // now that each server is configured, make the HTTP app
  httpApp := caddyhttp.App{
    HTTPPort:    tryInt(options["http_port"], &warnings),
    HTTPSPort:   tryInt(options["https_port"], &warnings),
    GracePeriod: tryDuration(options["grace_period"], &warnings),
    Servers:     servers,
  }

  // 并且最终所有的配置会放在一个完整 `caddy.Config` 配置中
  cfg := &caddy.Config{AppsRaw: make(caddy.ModuleMap)}
  cfg.AppsRaw["http"] = caddyconfig.JSON(httpApp, &warnings)
  // 到此配置初始化完毕
  return cfg

特别注意:上面的代码只保留了核心部分,完整代码中还包含 https 处理,日志处理等关键逻辑

  • 接下来看下 Caddy 如何去组织 module,也即上文用到的 registeredDirectives[dir] 中的数据如何初始化的

Posts in this Series