3

I'm trying to deploy a Airflow on Google Cloud Compute engine instance.

There are specific things to change in the initialization action (Shell script to initialize the cloud VM) In order to deploy the script. I was wondering if I could handle this using terraform.

Here is my terraform script.

provider "google" {
  region      = "${var.region}"
  project     = "${var.project_name}"
  credentials = "${file("${var.credentials_file_path}")}"
  zone        = "${var.region_zone}"
}

resource "google_sql_database_instance" "master" {
  name = "${var.db_instance}"
  region = "${var.region}"
  settings {
    tier = "db-n1-standard-1"
  }
}


resource "google_sql_user" "users" {
  name     = "${var.db_user}"
  instance = "${google_sql_database_instance.master.name}"
  host     = "%"
  password = "${var.db_password}"
  depends_on = ["google_sql_database_instance.master"]
}


resource "google_sql_database" "airflow" {
  name      = "${var.db_name}"
  instance  = "${google_sql_database_instance.master.name}"
  charset   = "utf8"
  collation = "utf8_general_ci"
  depends_on = ["google_sql_database_instance.master"]
}

resource "google_compute_instance" "default" {
  name         = "${var.machine_name}"
  machine_type = "${var.machine_type}"
  tags         = ["http-tag", "https-tag", "http-server", "https-server"]

  boot_disk {
    initialize_params {
      image = "projects/debian-cloud/global/images/family/debian-8"
    }
  }

  network_interface {
    network = "default"

    access_config {
      // Ephemeral IP
    }
  }
//----------------------------My Custom Script-------------------------
  metadata_startup_script = "${file("scripts/airflow-instance.sh")}"
//---------------------------------------------------------------------
  service_account {
    email = "*****@project-name.iam.gserviceaccount.com"
    scopes = ["https://www.googleapis.com/auth/cloud-platform"]
  }
   depends_on = ["google_sql_database_instance.master",
"google_sql_user.users","google_sql_database.airflow" ]
}

resource "google_compute_firewall" "default" {
  name    = "terraform-airflow-firewall"
  network = "default"

  allow {
    protocol = "tcp"
    ports    = ["80", "8080"]
  }

  target_tags   = ["https-server", "http-server"]

}

Shell script is as follows

THINGS TO CHANGE DYNAMICALLY are marked with **value**

#!/bin/bash
# Download the proxy and make it executable. 
sudo mkdir /opt/cloud_sql_proxy
cd /opt/cloud_sql_proxy
sudo wget https://dl.google.com/cloudsql/cloud_sql_proxy.linux.amd64 -O cloud_sql_proxy
sudo chmod +x /opt/cloud_sql_proxy/cloud_sql_proxy

# Start the CloudSQL proxy specifying the database instance to connect to.
# Replace INSTANCE_CONNECTION_NAME with your actual CloudSQL instance connection name. It can be found in the instance properties on the GCP console.
nohup ./cloud_sql_proxy -instances=**PROJECT_NAME**:us-east1:**CLOUD_SQL_INSTANCE_NAME**=tcp:3306 &
# Install prerequisites.
sudo apt-get update && sudo apt-get install -y \
python3-pip \
python3-dev \
build-essential \
libssl-dev \
libffi-dev \
libmysqlclient-dev

# Upgrade pip.
sudo easy_install3 -U pip

# Install some other stuff.
sudo pip3 install mysqlclient

# Install a missed dependency
sudo pip3 install --ignore-installed six

# Install Airflow with the extra package gcp_api containing the hooks and operators for the GCP services.
sudo pip3 install apache-airflow[gcp_api]
sudo pip3 install oauth2client
sudo pip3 install google-api-python-client

# Create AIRFLOW_HOME directory.
export AIRFLOW_HOME=/airflow
sudo mkdir $AIRFLOW_HOME
sudo mkdir $AIRFLOW_HOME/dags
sudo chmod 777 $AIRFLOW_HOME
sudo chmod 777 $AIRFLOW_HOME/dags

cd $AIRFLOW_HOME 

# Run Airflow a first time to create the airflow.cfg configuration file and edit it.
airflow version

#Update airflow.cfg for our config
sed -i 's/executor = SequentialExecutor/executor = LocalExecutor/g' airflow.cfg
sed -i 's/load_examples = True/load_examples = False/g' airflow.cfg
sed -i 's/sql_alchemy_conn = sqlite:\/\/\/\/airflow\/airflow.db/sql_alchemy_conn = mysql:\/\/**USER:PASSWORD**@127.0.0.1:3306\/**DB_NAME**/g' airflow.cfg

airflow initdb

nohup airflow webserver -p 8080 &
nohup airflow scheduler &
1
  • Try yourself and see how you do. I'm not familiar with this topic but I'd recommend googling fo something like that! Commented Feb 26, 2018 at 14:53

2 Answers 2

5

You could try using the template_file datasource. It can be used to render templates with values. The whole thing might then look sth like this:

variable "project_name" { type = "string" }

data "template_file" "airflow_instance" {
  template = "${file("${path.module}/scripts/airflow-instance.sh")}"

  vars {
     PROJECT_NAME = "${var.project_name}"
     ...
  }
}

resource "google_compute_instance" "default" {

  ...

  metadata_startup_script = "${data.template_file.airflow_instance.rendered}"

  ...
}
Sign up to request clarification or add additional context in comments.

Comments

0

This issue is a little bit old however the existing answer is too verbose, terraform consoleaccepts variables from stdin.

That means you can reference variables by doing

$ echo var.name | terraform console
"some_variable_name"

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.