Commit 1270e2cb authored by Spiros Koulouzis's avatar Spiros Koulouzis

Added retry to deployer. The VM we use timesout all the time

parent 144e6752
...@@ -17,31 +17,20 @@ package nl.uva.sne.drip.api.v1.rest; ...@@ -17,31 +17,20 @@ package nl.uva.sne.drip.api.v1.rest;
import com.webcohesion.enunciate.metadata.rs.ResponseCode; import com.webcohesion.enunciate.metadata.rs.ResponseCode;
import com.webcohesion.enunciate.metadata.rs.StatusCodes; import com.webcohesion.enunciate.metadata.rs.StatusCodes;
import nl.uva.sne.drip.drip.commons.data.v1.external.ToscaRepresentation;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeoutException;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.logging.Logger; import java.util.logging.Logger;
import javax.annotation.security.RolesAllowed; import javax.annotation.security.RolesAllowed;
import org.json.JSONException;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController; import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.bind.annotation.PathVariable;
import nl.uva.sne.drip.api.exception.BadRequestException;
import nl.uva.sne.drip.api.service.DRIPLogService; import nl.uva.sne.drip.api.service.DRIPLogService;
import nl.uva.sne.drip.api.service.ToscaService;
import nl.uva.sne.drip.api.service.UserService; import nl.uva.sne.drip.api.service.UserService;
import nl.uva.sne.drip.drip.commons.data.v1.external.DRIPLogRecord; import nl.uva.sne.drip.drip.commons.data.v1.external.DRIPLogRecord;
import org.springframework.web.bind.annotation.RequestBody;
/** /**
* This controller is responsible for storing TOSCA descriptions that can be * This controller is responsible for storing TOSCA descriptions that can be
...@@ -60,7 +49,6 @@ public class LogController { ...@@ -60,7 +49,6 @@ public class LogController {
@Autowired @Autowired
private DRIPLogService logService; private DRIPLogService logService;
@RequestMapping(method = RequestMethod.GET)
@RolesAllowed({UserService.USER, UserService.ADMIN}) @RolesAllowed({UserService.USER, UserService.ADMIN})
public @ResponseBody public @ResponseBody
DRIPLogRecord get() { DRIPLogRecord get() {
......
...@@ -25,7 +25,6 @@ import org.springframework.beans.factory.annotation.Autowired; ...@@ -25,7 +25,6 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController; import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PathVariable;
......
...@@ -35,6 +35,9 @@ if not getattr(logger, 'handler_set', None): ...@@ -35,6 +35,9 @@ if not getattr(logger, 'handler_set', None):
h.setFormatter(formatter) h.setFormatter(formatter)
logger.addHandler(h) logger.addHandler(h)
logger.handler_set = True logger.handler_set = True
retry=0
def get_resp_line(line): def get_resp_line(line):
...@@ -124,6 +127,7 @@ def docker_check(vm, compose_name): ...@@ -124,6 +127,7 @@ def docker_check(vm, compose_name):
logger.info("Finished docker info services on: "+vm.ip) logger.info("Finished docker info services on: "+vm.ip)
except Exception as e: except Exception as e:
global retry
exc_type, exc_obj, tb = sys.exc_info() exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame f = tb.tb_frame
lineno = tb.tb_lineno lineno = tb.tb_lineno
......
...@@ -33,6 +33,8 @@ if not getattr(logger, 'handler_set', None): ...@@ -33,6 +33,8 @@ if not getattr(logger, 'handler_set', None):
logger.handler_set = True logger.handler_set = True
retry=0
def deploy_compose(vm, compose_file, compose_name): def deploy_compose(vm, compose_file, compose_name):
try: try:
logger.info("Starting docker compose deployment on: "+vm.ip) logger.info("Starting docker compose deployment on: "+vm.ip)
...@@ -47,9 +49,14 @@ def deploy_compose(vm, compose_file, compose_name): ...@@ -47,9 +49,14 @@ def deploy_compose(vm, compose_file, compose_name):
stdout.read() stdout.read()
logger.info("Finished docker compose deployment on: "+vm.ip) logger.info("Finished docker compose deployment on: "+vm.ip)
except Exception as e: except Exception as e:
global retry
if 'Connection timed out' in str(e) and retry < 3:
retry+=1
deploy_compose(vm)
logger.error(vm.ip + " " + str(e)) logger.error(vm.ip + " " + str(e))
return "ERROR:" + vm.ip + " " + str(e) return "ERROR:" + vm.ip + " " + str(e)
ssh.close() ssh.close()
retry=0
return "SUCCESS" return "SUCCESS"
......
...@@ -32,6 +32,7 @@ if not getattr(logger, 'handler_set', None): ...@@ -32,6 +32,7 @@ if not getattr(logger, 'handler_set', None):
logger.handler_set = True logger.handler_set = True
retry=0
def install_engine(vm): def install_engine(vm):
try: try:
...@@ -54,9 +55,16 @@ def install_engine(vm): ...@@ -54,9 +55,16 @@ def install_engine(vm):
stdout.read() stdout.read()
logger.info("Finised docker engine installation on: "+(vm.ip)) logger.info("Finised docker engine installation on: "+(vm.ip))
except Exception as e: except Exception as e:
global retry
if 'Connection timed out' in str(e) and retry < 3:
logger.warning(vm.ip + " " + str(e)+". Retiring")
retry+=1
install_engine(vm)
logger.error(vm.ip + " " + str(e)) logger.error(vm.ip + " " + str(e))
return "ERROR:"+vm.ip+" "+str(e) return "ERROR:"+vm.ip+" "+str(e)
ssh.close() ssh.close()
retry=0
return "SUCCESS" return "SUCCESS"
def run(vm_list,rabbitmq_host,owner): def run(vm_list,rabbitmq_host,owner):
......
...@@ -20,6 +20,21 @@ import paramiko, os ...@@ -20,6 +20,21 @@ import paramiko, os
from vm_info import VmInfo from vm_info import VmInfo
import linecache import linecache
import sys import sys
import logging
from drip_logging.drip_logging_handler import *
logger = logging.getLogger(__name__)
if not getattr(logger, 'handler_set', None):
logger.setLevel(logging.INFO)
h = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
h.setFormatter(formatter)
logger.addHandler(h)
logger.handler_set = True
retry=0
def PrintException(): def PrintException():
exc_type, exc_obj, tb = sys.exc_info() exc_type, exc_obj, tb = sys.exc_info()
...@@ -34,7 +49,7 @@ def PrintException(): ...@@ -34,7 +49,7 @@ def PrintException():
def install_manager(vm): def install_manager(vm):
try: try:
print "%s: ====== Start Kubernetes Master Installing ======" % (vm.ip) logger.info("Starting kubernetes master installation on: "+(vm.ip))
parentDir = os.path.dirname(os.path.abspath(vm.key)) parentDir = os.path.dirname(os.path.abspath(vm.key))
os.chmod(parentDir, 0o700) os.chmod(parentDir, 0o700)
os.chmod(vm.key, 0o600) os.chmod(vm.key, 0o600)
...@@ -65,9 +80,11 @@ def install_manager(vm): ...@@ -65,9 +80,11 @@ def install_manager(vm):
stdin, stdout, stderr = ssh.exec_command("sudo chgrp %s /tmp/admin.conf" % (vm.user)) stdin, stdout, stderr = ssh.exec_command("sudo chgrp %s /tmp/admin.conf" % (vm.user))
stdout.read() stdout.read()
sftp.get("/tmp/admin.conf", file_path+"/admin.conf") sftp.get("/tmp/admin.conf", file_path+"/admin.conf")
print "%s: ========= Kubernetes Master Installed =========" % (vm.ip) logger.info("Finished kubernetes master installation on: "+(vm.ip))
except Exception as e: except Exception as e:
print '%s: %s' % (vm.ip, e) global retry
#print '%s: %s' % (vm.ip, e)
logger.error(vm.ip + " " + str(e))
PrintException() PrintException()
return "ERROR:"+vm.ip+" "+str(e) return "ERROR:"+vm.ip+" "+str(e)
ssh.close() ssh.close()
...@@ -75,7 +92,7 @@ def install_manager(vm): ...@@ -75,7 +92,7 @@ def install_manager(vm):
def install_worker(join_cmd, vm): def install_worker(join_cmd, vm):
try: try:
print "%s: ====== Start Kubernetes Slave Installing ======" % (vm.ip) logger.info("Starting kubernetes slave installation on: "+(vm.ip))
ssh = paramiko.SSHClient() ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(vm.ip, username=vm.user, key_filename=vm.key) ssh.connect(vm.ip, username=vm.user, key_filename=vm.key)
...@@ -97,14 +114,17 @@ def install_worker(join_cmd, vm): ...@@ -97,14 +114,17 @@ def install_worker(join_cmd, vm):
stdout.read() stdout.read()
stdin, stdout, stderr = ssh.exec_command("sudo %s" % (join_cmd)) stdin, stdout, stderr = ssh.exec_command("sudo %s" % (join_cmd))
stdout.read() stdout.read()
print "%s: ========= Kubernetes Slave Installed =========" % (vm.ip) logger.info("Finished kubernetes slave installation on: "+(vm.ip))
except Exception as e: except Exception as e:
print '%s: %s' % (vm.ip, e) #print '%s: %s' % (vm.ip, e)
logger.error(vm.ip + " " + str(e))
return "ERROR:"+vm.ip+" "+str(e) return "ERROR:"+vm.ip+" "+str(e)
ssh.close() ssh.close()
return "SUCCESS" return "SUCCESS"
def run(vm_list): def run(vm_list,rabbitmq_host,owner):
rabbit = DRIPLoggingHandler(host=rabbitmq_host, port=5672,user=owner)
logger.addHandler(rabbit)
for i in vm_list: for i in vm_list:
if i.role == "master": if i.role == "master":
join_cmd = install_manager(i) join_cmd = install_manager(i)
......
...@@ -31,6 +31,8 @@ if not getattr(logger, 'handler_set', None): ...@@ -31,6 +31,8 @@ if not getattr(logger, 'handler_set', None):
logger.handler_set = True logger.handler_set = True
retry=0
def scale_service(vm, application_name, service_name, service_num): def scale_service(vm, application_name, service_name, service_num):
try: try:
logger.info("Starting docker service scaling on: "+vm.ip) logger.info("Starting docker service scaling on: "+vm.ip)
...@@ -42,6 +44,7 @@ def scale_service(vm, application_name, service_name, service_num): ...@@ -42,6 +44,7 @@ def scale_service(vm, application_name, service_name, service_num):
stdout.read() stdout.read()
logger.info("Finished docker service scaling on: "+vm.ip) logger.info("Finished docker service scaling on: "+vm.ip)
except Exception as e: except Exception as e:
global retry
logger.error(vm.ip + " " + str(e)) logger.error(vm.ip + " " + str(e))
return "ERROR:" + vm.ip + " " + str(e) return "ERROR:" + vm.ip + " " + str(e)
ssh.close() ssh.close()
......
...@@ -33,6 +33,8 @@ if not getattr(logger, 'handler_set', None): ...@@ -33,6 +33,8 @@ if not getattr(logger, 'handler_set', None):
logger.handler_set = True logger.handler_set = True
retry=0
def install_manager(vm): def install_manager(vm):
try: try:
logger.info("Starting swarm manager installation on: "+(vm.ip)) logger.info("Starting swarm manager installation on: "+(vm.ip))
...@@ -58,9 +60,14 @@ def install_manager(vm): ...@@ -58,9 +60,14 @@ def install_manager(vm):
ret = retstr[2].encode() ret = retstr[2].encode()
logger.info("Finished swarm manager installation on: "+(vm.ip)) logger.info("Finished swarm manager installation on: "+(vm.ip))
except Exception as e: except Exception as e:
global retry
if 'Connection timed out' in str(e) and retry < 3:
retry+=1
install_manager(vm)
logger.error(vm.ip + " " + str(e)) logger.error(vm.ip + " " + str(e))
return "ERROR:" + vm.ip + " " + str(e) return "ERROR:" + vm.ip + " " + str(e)
ssh.close() ssh.close()
retry=0
return ret return ret
def install_worker(join_cmd, vm): def install_worker(join_cmd, vm):
...@@ -79,9 +86,13 @@ def install_worker(join_cmd, vm): ...@@ -79,9 +86,13 @@ def install_worker(join_cmd, vm):
stdout.read() stdout.read()
logger.info("Finished swarm worker installation on: "+(vm.ip)) logger.info("Finished swarm worker installation on: "+(vm.ip))
except Exception as e: except Exception as e:
if 'Connection timed out' in str(e) and retry < 3:
retry+=1
install_worker(vm)
logger.error(vm.ip + " " + str(e)) logger.error(vm.ip + " " + str(e))
return "ERROR:" + vm.ip + " " + str(e) return "ERROR:" + vm.ip + " " + str(e)
ssh.close() ssh.close()
retry=0
return "SUCCESS" return "SUCCESS"
def run(vm_list,rabbitmq_host,owner): def run(vm_list,rabbitmq_host,owner):
......
...@@ -110,7 +110,7 @@ def handleDelivery(message): ...@@ -110,7 +110,7 @@ def handleDelivery(message):
if manager_type == "kubernetes": if manager_type == "kubernetes":
ret = docker_kubernetes.run(vm_list) ret = docker_kubernetes.run(vm_list,rabbitmq_host,owner)
return ret return ret
elif manager_type == "swarm": elif manager_type == "swarm":
ret = docker_engine.run(vm_list,rabbitmq_host,owner) ret = docker_engine.run(vm_list,rabbitmq_host,owner)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment