kubernetes 无法使用Terraform为AKS群集创建命名空间,报告没有此类主机

p4tfgftt  于 2022-11-28  发布在  Kubernetes
关注(0)|答案(4)|浏览(127)

我有一个模块定义如下:

提供者.tf

provider "kubernetes" {
  #load_config_file = "false"
  host                   = azurerm_kubernetes_cluster.aks.kube_config.0.host
  username               = azurerm_kubernetes_cluster.aks.kube_config.0.username
  password               = azurerm_kubernetes_cluster.aks.kube_config.0.password
  client_certificate     = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate)
  client_key             = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_key)
  cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate)
}

输出.tf

output "node_resource_group" {
  value       = azurerm_kubernetes_cluster.aks.node_resource_group
  description = "The name of resource group where the AKS Nodes are created"
}
output "kubeConfig" {
  value = azurerm_kubernetes_cluster.aks.kube_config_raw
  description = "Kubeconfig of AKS Cluster"
}

output "host" {
  value = azurerm_kubernetes_cluster.aks.kube_config.0.host
}

output "client_key" {
  value = azurerm_kubernetes_cluster.aks.kube_config.0.client_key
}

output "client_certificate" {
  value = azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate
}

output "kube_config" {
  value = azurerm_kubernetes_cluster.aks.kube_config_raw
}

output "cluster_ca_certificate" {
  value = azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate
}

主文件.tf

resource "azurerm_log_analytics_workspace" "law" {
  name                = "${var.tla}-la-${local.lookup_result}-${var.identifier}"
  location            = data.azurerm_resource_group.rg.location
  resource_group_name = data.azurerm_resource_group.rg.name
  sku                 = var.la_sku
  retention_in_days   = 30
}

resource "azurerm_kubernetes_cluster" "aks" {
  name                    = "${var.tla}-aks-${local.lookup_result}-${var.identifier}"
  location                = data.azurerm_resource_group.rg.location
  resource_group_name     = data.azurerm_resource_group.rg.name
  dns_prefix              = var.dns_prefix
  kubernetes_version      = var.kubernetes_version
  sku_tier                = var.sku_tier
  private_cluster_enabled = var.enable_private_cluster
  #api_server_authorized_ip_ranges = ""
  default_node_pool {
    name                  = "syspool001"
    orchestrator_version  = var.orchestrator_version
    availability_zones    = var.agents_availability_zones
    enable_auto_scaling   = true
    node_count            = var.default_pool_node_count
    max_count             = var.default_pool_max_node_count
    min_count             = var.default_pool_min_node_count
    max_pods              = var.default_pool_max_pod_count
    vm_size               = var.agents_size
    enable_node_public_ip = false
    os_disk_size_gb       = var.default_pool_os_disk_size_gb
    type                  = "VirtualMachineScaleSets"
    vnet_subnet_id        = var.vnet_subnet_id
    node_labels           = var.agents_labels
    tags                  = merge(local.tags, var.agents_tags)
  }

  network_profile {
    network_plugin     = var.network_plugin
    network_policy     = var.network_policy
    dns_service_ip     = var.net_profile_dns_service_ip
    docker_bridge_cidr = var.net_profile_docker_bridge_cidr
    service_cidr       = var.net_profile_service_cidr
  }

  role_based_access_control {
    enabled = true
    azure_active_directory {
      managed                = true
      admin_group_object_ids = var.rbac_aad_admin_group_object_ids
    }
  }

  identity {
    type = "SystemAssigned"
  }

  addon_profile {
    azure_policy {
      enabled = true
    }

    http_application_routing {
      enabled = false
    }

    oms_agent {
      enabled                    = true
      log_analytics_workspace_id = data.azurerm_log_analytics_workspace.log_analytics.id
    }
  }

  tags = local.tags

  lifecycle {
    ignore_changes = [
      default_node_pool
    ]
  }

}

resource "azurerm_kubernetes_cluster_node_pool" "aksnp" {
  lifecycle {
    ignore_changes = [
      node_count
    ]
  }
  for_each              = var.additional_node_pools
  kubernetes_cluster_id = azurerm_kubernetes_cluster.aks.id
  name                  = each.value.node_os == "Windows" ? substr(each.key, 0, 6) : substr(each.key, 0, 12)
  node_count            = each.value.node_count
  vm_size               = each.value.vm_size
  availability_zones    = each.value.zones
  max_pods              = each.value.max_pods
  os_disk_size_gb       = each.value.os_disk_size_gb
  os_type               = each.value.node_os
  vnet_subnet_id        = var.vnet_subnet_id
  node_taints           = each.value.taints
  enable_auto_scaling   = each.value.cluster_auto_scaling
  min_count             = each.value.cluster_auto_scaling_min_count
  max_count             = each.value.cluster_auto_scaling_max_count
}

resource "kubernetes_namespace" "aks-namespace" {
  metadata {
    name = var.namespace
  }
}

数据.tf

data "azurerm_resource_group" "rg" {
  name = var.resource_group_name
}

查找.tf

locals {

  environment_lookup = {
    dev  = "d"
    test = "t"
    int  = "i"
    prod = "p"
    prd  = "p"
    uat  = "a"
    poc  = "d"
    dr   = "r"
    lab  = "l"
  }

 
  lookup_result = lookup(local.environment_lookup, var.environment)

  tags = merge(
    data.azurerm_resource_group.rg.tags, {
      Directory      = "tectcompany.com",
      PrivateDNSZone = var.private_dns_zone,
      Immutable      = "False",
      ManagedOS      = "True",
    }
  )
}

data "azurerm_log_analytics_workspace" "log_analytics" {
  name                = "abc-az-lad2"
  resource_group_name = "abc-dev-aae"
}

变量.tf

variable "secondary_region" {
  description = "Is this resource being deployed into the secondary (pair) region?"

  default = false
  type    = bool
}

variable "override_log_analytics_workspace" {
  description = "Override the vm log analytics workspace"
  type        = string
  default     = null
}

variable "override_log_analytics_resource_group_name" {
  description = "Overrides the log analytics resource group name"
  type        = string
  default     = null
}

variable "environment" {
  description = "The name of environment for the AKS Cluster"
  type        = string
  default     = "dev"
}

variable "identifier" {
  description = "The identifier for the AKS Cluster"
  type        = number
  default     = "001"
}

variable "kubernetes_version" {
  description = "Specify which Kubernetes release to use. The default used is the latest Kubernetes version available in the region"
  type        = string
  default     = "1.19.9"
}

variable "dns_prefix" {
  description = "The dns prefix for the AKS Cluster"
  type        = string
  default     = "odessa-sandpit"
}

variable "orchestrator_version" {
  description = "Specify which Kubernetes release to use for the orchestration layer. The default used is the latest Kubernetes version available in the region"
  type        = string
  default     = null
}

variable "agents_availability_zones" {
  description = "(Optional) A list of Availability Zones across which the Node Pool should be spread. Changing this forces a new resource to be created."
  type        = list(string)
  default     = null
}

variable "agents_size" {
  default     = "Standard_D4s_v3"
  description = "The default virtual machine size for the Kubernetes agents"
  type        = string
}

variable "vnet_subnet_id" {
  description = "(Optional) The ID of a Subnet where the Kubernetes Node Pool should exist. Changing this forces a new resource to be created."
  type        = string
  default     = null
}

variable "agents_labels" {
  description = "(Optional) A map of Kubernetes labels which should be applied to nodes in the Default Node Pool. Changing this forces a new resource to be created."
  type        = map(string)
  default     = {}
}

variable "agents_tags" {
  description = "(Optional) A mapping of tags to assign to the Node Pool."
  type        = map(string)
  default     = {}
}

variable "net_profile_dns_service_ip" {
  description = "(Optional) IP address within the Kubernetes service address range that will be used by cluster service discovery (kube-dns). Changing this forces a new resource to be created."
  type        = string
  default     = null
}

variable "net_profile_docker_bridge_cidr" {
  description = "(Optional) IP address (in CIDR notation) used as the Docker bridge IP address on nodes. Changing this forces a new resource to be created."
  type        = string
  default     = null
}

variable "net_profile_service_cidr" {
  description = "(Optional) The Network Range used by the Kubernetes service. Changing this forces a new resource to be created."
  type        = string
  default     = null
}

variable "rbac_aad_admin_group_object_ids" {
  description = "Object ID of groups with admin access."
  type        = list(string)
  default     = null
}

variable "network_policy" {
  description = "(Optional) The Network Policy to be used by the network profile of Azure Kubernetes Cluster."
  type        = string
  default     = "azure"
}

variable "network_plugin" {
  description = "(Optional) The Network Plugin to be used by the network profile of Azure Kubernetes Cluster."
  type        = string
  default     = "azure"
}

variable "enable_private_cluster" {
  description = "(Optional) Set this variable to true if you want Azure Kubernetes Cluster to be private."
  default     = true
}

variable "default_pool_node_count" {
  description = "(Optional) The initial node count for the default pool of AKS Cluster"
  type        = number
  default     = 3
}

variable "default_pool_max_node_count" {
  description = "(Optional) The max node count for the default pool of AKS Cluster"
  type        = number
  default     = 6
}

variable "default_pool_min_node_count" {
  description = "(Optional) The min node count for the default pool of AKS Cluster"
  type        = number
  default     = 3
}

variable "default_pool_max_pod_count" {
  description = "(Optional) The max pod count for the default pool of AKS Cluster"
  type        = number
  default     = 13
}

variable "default_pool_os_disk_size_gb" {
  description = "(Optional) The size of os disk in gb for the nodes from default pool of AKS Cluster"
  type        = string
  default     = "64"
}

variable "additional_node_pools" {
  type = map(object({
    node_count                     = number
    max_pods                       = number
    os_disk_size_gb                = number
    vm_size                        = string
    zones                          = list(string)
    node_os                        = string
    taints                         = list(string)
    cluster_auto_scaling           = bool
    cluster_auto_scaling_min_count = number
    cluster_auto_scaling_max_count = number
  }))
}
variable "sku_tier" {
  description = "(Optional)The SKU Tier that should be used for this Kubernetes Cluster, possible values Free or Paid"
  type        = string
  default     = "Paid"

  validation {
    condition     = contains(["Free", "Paid"], var.sku_tier)
    error_message = "SKU_TIER can only be either Paid or Free."
  }

}

variable "la_sku" {

  description = "(Optional)The SKU Tier that should be used for Log Analytics. Multiple values are possible."
  type        = string
  default     = "PerGB2018"

  validation {
    condition     = contains(["Free", "PerNode", "Premium", "Standard", "Standalone", "Unlimited", "CapacityReservation", "PerGB2018"], var.la_sku)
    error_message = "SKU_TIER for Log Analytics can be can only be either of Free, PerNode, Premium, Standard, Standalone, Unlimited, CapacityReservation and PerGB2018(Default Value)."
  }

}

variable "resource_group_name" {
  description = "Resource Group for deploying AKS Cluster"
  type = string
}

variable "private_dns_zone" {
  description = "DNS prefix for AKS Cluster"
  type = string
  default = "testcluster"
}

variable "tla" {
  description = "Three Level acronym - three letter abbreviation for application"
  type = string
  default = ""
  validation {
    condition     = length(var.tla) == 3
    error_message = "The TLA should be precisely three characters."
  }
}

variable "namespace"{
 description = "AKS Namespace"
  type = string
}

最后,我将调用下面的模块来创建AKS集群、LA和AKS集群的命名空间:

provider "azurerm" {
   features {}
   #version = "~> 2.53.0"
}
module "aks-cluster1" {

  source = "../../"
  resource_group_name = "pst-aks-sandpit-dev-1"
  tla = "pqr"
  additional_node_pools = { 
        pool1 = {
            node_count                      = "1"
            max_pods                       = "110"
            os_disk_size_gb                = "30"
            vm_size                        = "Standard_D8s_v3"
            zones                          = ["1","2","3"]
            node_os                        = "Linux"
            taints                         =  ["kubernetes.io/os=windows:NoSchedule"]
            cluster_auto_scaling           = true
            cluster_auto_scaling_min_count = "2"
            cluster_auto_scaling_max_count = "4"
        } 
                            }
  namespace = "sample-ns"
}

**问题:**当terraform尝试创建集群时,我收到一个错误,提示没有这样的主机。

我认为它不能连接到集群,但我可能错了。我不知道它内部是如何处理的。

Error: Post "https://testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io:443/api/v1/namespaces": dial tcp: lookup testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io: no such host
iyr7buue

iyr7buue1#

我是Terraform Kubernetes提供者的维护者之一,我经常看到这个特殊的问题。作为一个以前的devops人员,我对我在这个领域不断看到的斗争表示同情。如果可能的话,我真的很想在提供者中修复这个问题。
您所面临的问题是Terraform核心在传递未知值到提供者配置块时的限制。

You can use expressions in the values of these configuration arguments, 
but can only reference values that are known before the configuration is applied.

当您对底层基础架构(例如本例中的AKS集群)进行更改时,您将向Kubernetes提供程序配置块传递一个未知值,因为在将更改应用于AKS集群之前,集群基础架构的完整范围是未知的。
虽然我写了最初的指南来说明可以解决其中的一些问题,但是您从经验中发现,要使Kubernetes提供程序与底层基础设施一起工作,有许多边缘情况使其成为一个不可靠和不直观的过程。但我们确实计划通过添加better error messages upfront来消除一些不稳定因素,这样在这种情况下就可以省去一些麻烦。
为了解决这类问题,集群基础设施需要保持与Kubernetes和Helm提供者资源分离的状态。我这里有一个例子,它在一个apply中构建了一个AKS集群,然后在第二个apply中管理Kubernetes/Helm资源。您可以使用这种方法为您的特定用例构建最健壮的配置:
https://github.com/hashicorp/terraform-provider-kubernetes/tree/e058e225e621f06e393bcb6407e7737fd43817bd/_examples/aks
我知道这种双应用方法不方便,这就是为什么我们继续尝试在单应用场景以及包含处于相同Terraform状态的Kubernetes和集群资源的场景中容纳用户。但是,在上游Terraform能够添加对此的支持之前,单应用工作流仍将存在缺陷,并且不如将集群基础设施与Kubernetes资源分离的可靠性高。
大多数情况下可以使用depends_on解决(以确保在创建Kubernetes资源之前创建集群),或者将集群基础架构移到一个单独的模块中并运行terraform state rm module.kubernetes-configterraform apply -target=module.aks-cluster。但我认为鼓励这种解决方案从长远来看会导致更多的麻烦,因为它让用户负责确定何时使用特殊的一次性apply命令,而不是从一开始就将Terraform设置为可靠且可预测的行为。另外,它可能会产生意想不到的副作用,如孤立云资源。

m2xkgtsf

m2xkgtsf2#

谢谢你提供的额外细节。我在这里看到了几个问题。第一个问题是你眼前问题的核心:

variable "enable_private_cluster" {
  description = "(Optional) Set this variable to true if you want Azure Kubernetes Cluster to be private."
  default     = true
}

您的群集部署在此处采用默认值,因此您的API端点是区域privatelink.australiaeast.azmk8s.io中的一个专用DNS条目:

Post "https://testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io:443/api/v1/namespaces"

terraform kubernetes提供程序必须能够访问API端点才能部署名称空间。但是,它无法解析域。要使此操作正常工作,您需要确保:

  1. Azure中存在专用DNS区域
    1.专用DNS区域链接到相关的虚拟网络,包括运行Terraform的主机
  2. Terraform主机上的DNS解析器可以通过在https://learn.microsoft.com/en-us/azure/virtual-network/what-is-ip-address-168-63-129-16定义的端点解析私有链接域-请注意,如果您的网络使用内部部署的内部DNS,则这可能需要转发私有域。
    1.您必须确保您的Terraform主机可以访问群集在TCP端口443上部署的privatelink端点
    Azure privatelink和私有DNS对于正确配置来说并不简单,尤其是在复杂的网络环境中。因此,您可能会遇到我在这里没有介绍的其他障碍。
    或者,您可能希望通过将此模块选项设置为false,在不使用privatelink的情况下部署此群集。出于安全性和合规性原因,这可能是不希望的,因此请确保您了解您在此执行的操作:
enable_private_cluster = false

我遇到的下一个问题是:

Error: creating Managed Kubernetes Cluster "pqr-aks-d-1" (Resource Group "pst-aks-sandpit-dev-1"): containerservice.ManagedClustersClient#CreateOrUpdate: Failure sending request: StatusCode=0 -- Original Error: Code="InsufficientAgentPoolMaxPodsPerAgentPool" Message="The AgentPoolProfile 'syspool001' has an invalid total maxPods(maxPods per node * node count), the total maxPods(13 * 824668498368) should be larger than 30. Please refer to aka.ms/aks-min-max-pod for more detail." Target="agentPoolProfile.kubernetesConfig.kubeletConfig.maxPods"

我通过设置以下内容克服了这一问题:

default_pool_max_pod_count = 30

最后一个问题是,您需要配置kubernetes提供者,使其具有足够的权限来部署名称空间:

│ Error: Unauthorized
│
│   with module.aks-cluster1.kubernetes_namespace.aks-namespace,
│   on ../../main.tf line 103, in resource "kubernetes_namespace" "aks-namespace":
│  103: resource "kubernetes_namespace" "aks-namespace" {

实现此目的的一种方法是使用kube_admin_config而不是kube_config:

provider "kubernetes" {
  #load_config_file = "false"
  host                   = azurerm_kubernetes_cluster.aks.kube_admin_config.0.host
  username               = azurerm_kubernetes_cluster.aks.kube_admin_config.0.username
  password               = azurerm_kubernetes_cluster.aks.kube_admin_config.0.password
  client_certificate     = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.client_certificate)
  client_key             = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.client_key)
  cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.cluster_ca_certificate)
}
u1ehiz5o

u1ehiz5o3#

很难说出问题是什么,因为您发布的代码不完整。对于初学者,您不应该这样做:

provider "kubernetes" {
  config_path    = "~/.kube/config"
}

你发布的AKS URL不存在,所以我认为这是从你的kube配置拉和旧集群默认

f3temu5u

f3temu5u4#

我们应该使用数据源获取集群详细信息以进行访问,并使用非私有集群的提供程序kub8 API可以直接访问。
步骤1:数据源

data "azurerm_kubernetes_cluster" "example" {
  name                = var.cluster_name
  resource_group_name = azurerm_resource_group.rg.name
}

步骤2:提供商

provider "kubernetes" {
  host                   = data.azurerm_kubernetes_cluster.example.kube_config.0.host
  username               = data.azurerm_kubernetes_cluster.example.kube_config.0.username
  password               = data.azurerm_kubernetes_cluster.example.kube_config.0.password
  client_certificate     = base64decode(data.azurerm_kubernetes_cluster.example.kube_config.0.client_certificate)
  client_key             = base64decode(data.azurerm_kubernetes_cluster.example.kube_config.0.client_key)
  cluster_ca_certificate = base64decode(data.azurerm_kubernetes_cluster.example.kube_config.0.cluster_ca_certificate)
}

相关问题