Commit 1270e2cb authored by Spiros Koulouzis's avatar Spiros Koulouzis

Added retry to deployer. The VM we use timesout all the time

parent 144e6752
......@@ -17,31 +17,20 @@ package nl.uva.sne.drip.api.v1.rest;
import com.webcohesion.enunciate.metadata.rs.ResponseCode;
import com.webcohesion.enunciate.metadata.rs.StatusCodes;
import nl.uva.sne.drip.drip.commons.data.v1.external.ToscaRepresentation;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeoutException;
import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.logging.Logger;
import javax.annotation.security.RolesAllowed;
import org.json.JSONException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.bind.annotation.PathVariable;
import nl.uva.sne.drip.api.exception.BadRequestException;
import nl.uva.sne.drip.api.service.DRIPLogService;
import nl.uva.sne.drip.api.service.ToscaService;
import nl.uva.sne.drip.api.service.UserService;
import nl.uva.sne.drip.drip.commons.data.v1.external.DRIPLogRecord;
import org.springframework.web.bind.annotation.RequestBody;
/**
* This controller is responsible for storing TOSCA descriptions that can be
......@@ -60,7 +49,6 @@ public class LogController {
@Autowired
private DRIPLogService logService;
@RequestMapping(method = RequestMethod.GET)
@RolesAllowed({UserService.USER, UserService.ADMIN})
public @ResponseBody
DRIPLogRecord get() {
......
......@@ -25,7 +25,6 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.bind.annotation.PathVariable;
......
......@@ -37,6 +37,9 @@ if not getattr(logger, 'handler_set', None):
logger.handler_set = True
retry=0
def get_resp_line(line):
line = line.encode('utf-8').strip('\n').encode('string_escape')
return json.dumps(line)
......@@ -124,6 +127,7 @@ def docker_check(vm, compose_name):
logger.info("Finished docker info services on: "+vm.ip)
except Exception as e:
global retry
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
......
......@@ -33,6 +33,8 @@ if not getattr(logger, 'handler_set', None):
logger.handler_set = True
retry=0
def deploy_compose(vm, compose_file, compose_name):
try:
logger.info("Starting docker compose deployment on: "+vm.ip)
......@@ -47,9 +49,14 @@ def deploy_compose(vm, compose_file, compose_name):
stdout.read()
logger.info("Finished docker compose deployment on: "+vm.ip)
except Exception as e:
global retry
if 'Connection timed out' in str(e) and retry < 3:
retry+=1
deploy_compose(vm)
logger.error(vm.ip + " " + str(e))
return "ERROR:" + vm.ip + " " + str(e)
ssh.close()
retry=0
return "SUCCESS"
......
......@@ -32,6 +32,7 @@ if not getattr(logger, 'handler_set', None):
logger.handler_set = True
retry=0
def install_engine(vm):
try:
......@@ -54,9 +55,16 @@ def install_engine(vm):
stdout.read()
logger.info("Finised docker engine installation on: "+(vm.ip))
except Exception as e:
global retry
if 'Connection timed out' in str(e) and retry < 3:
logger.warning(vm.ip + " " + str(e)+". Retiring")
retry+=1
install_engine(vm)
logger.error(vm.ip + " " + str(e))
return "ERROR:"+vm.ip+" "+str(e)
ssh.close()
retry=0
return "SUCCESS"
def run(vm_list,rabbitmq_host,owner):
......
......@@ -20,6 +20,21 @@ import paramiko, os
from vm_info import VmInfo
import linecache
import sys
import logging
from drip_logging.drip_logging_handler import *
logger = logging.getLogger(__name__)
if not getattr(logger, 'handler_set', None):
logger.setLevel(logging.INFO)
h = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
h.setFormatter(formatter)
logger.addHandler(h)
logger.handler_set = True
retry=0
def PrintException():
exc_type, exc_obj, tb = sys.exc_info()
......@@ -34,7 +49,7 @@ def PrintException():
def install_manager(vm):
try:
print "%s: ====== Start Kubernetes Master Installing ======" % (vm.ip)
logger.info("Starting kubernetes master installation on: "+(vm.ip))
parentDir = os.path.dirname(os.path.abspath(vm.key))
os.chmod(parentDir, 0o700)
os.chmod(vm.key, 0o600)
......@@ -65,9 +80,11 @@ def install_manager(vm):
stdin, stdout, stderr = ssh.exec_command("sudo chgrp %s /tmp/admin.conf" % (vm.user))
stdout.read()
sftp.get("/tmp/admin.conf", file_path+"/admin.conf")
print "%s: ========= Kubernetes Master Installed =========" % (vm.ip)
logger.info("Finished kubernetes master installation on: "+(vm.ip))
except Exception as e:
print '%s: %s' % (vm.ip, e)
global retry
#print '%s: %s' % (vm.ip, e)
logger.error(vm.ip + " " + str(e))
PrintException()
return "ERROR:"+vm.ip+" "+str(e)
ssh.close()
......@@ -75,7 +92,7 @@ def install_manager(vm):
def install_worker(join_cmd, vm):
try:
print "%s: ====== Start Kubernetes Slave Installing ======" % (vm.ip)
logger.info("Starting kubernetes slave installation on: "+(vm.ip))
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(vm.ip, username=vm.user, key_filename=vm.key)
......@@ -97,14 +114,17 @@ def install_worker(join_cmd, vm):
stdout.read()
stdin, stdout, stderr = ssh.exec_command("sudo %s" % (join_cmd))
stdout.read()
print "%s: ========= Kubernetes Slave Installed =========" % (vm.ip)
logger.info("Finished kubernetes slave installation on: "+(vm.ip))
except Exception as e:
print '%s: %s' % (vm.ip, e)
#print '%s: %s' % (vm.ip, e)
logger.error(vm.ip + " " + str(e))
return "ERROR:"+vm.ip+" "+str(e)
ssh.close()
return "SUCCESS"
def run(vm_list):
def run(vm_list,rabbitmq_host,owner):
rabbit = DRIPLoggingHandler(host=rabbitmq_host, port=5672,user=owner)
logger.addHandler(rabbit)
for i in vm_list:
if i.role == "master":
join_cmd = install_manager(i)
......
......@@ -31,6 +31,8 @@ if not getattr(logger, 'handler_set', None):
logger.handler_set = True
retry=0
def scale_service(vm, application_name, service_name, service_num):
try:
logger.info("Starting docker service scaling on: "+vm.ip)
......@@ -42,6 +44,7 @@ def scale_service(vm, application_name, service_name, service_num):
stdout.read()
logger.info("Finished docker service scaling on: "+vm.ip)
except Exception as e:
global retry
logger.error(vm.ip + " " + str(e))
return "ERROR:" + vm.ip + " " + str(e)
ssh.close()
......
......@@ -33,6 +33,8 @@ if not getattr(logger, 'handler_set', None):
logger.handler_set = True
retry=0
def install_manager(vm):
try:
logger.info("Starting swarm manager installation on: "+(vm.ip))
......@@ -58,9 +60,14 @@ def install_manager(vm):
ret = retstr[2].encode()
logger.info("Finished swarm manager installation on: "+(vm.ip))
except Exception as e:
global retry
if 'Connection timed out' in str(e) and retry < 3:
retry+=1
install_manager(vm)
logger.error(vm.ip + " " + str(e))
return "ERROR:" + vm.ip + " " + str(e)
ssh.close()
retry=0
return ret
def install_worker(join_cmd, vm):
......@@ -79,9 +86,13 @@ def install_worker(join_cmd, vm):
stdout.read()
logger.info("Finished swarm worker installation on: "+(vm.ip))
except Exception as e:
if 'Connection timed out' in str(e) and retry < 3:
retry+=1
install_worker(vm)
logger.error(vm.ip + " " + str(e))
return "ERROR:" + vm.ip + " " + str(e)
ssh.close()
retry=0
return "SUCCESS"
def run(vm_list,rabbitmq_host,owner):
......
......@@ -110,7 +110,7 @@ def handleDelivery(message):
if manager_type == "kubernetes":
ret = docker_kubernetes.run(vm_list)
ret = docker_kubernetes.run(vm_list,rabbitmq_host,owner)
return ret
elif manager_type == "swarm":
ret = docker_engine.run(vm_list,rabbitmq_host,owner)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment