Commit a31c0fb4 authored by Spiros Koulouzis's avatar Spiros Koulouzis

try to make deploy more reliable

parent 8ec65ca8
......@@ -22,4 +22,5 @@
/drip_parser/nbproject/private/
/drip_parser (copy)/nbproject/private/
/drip-provisioner (copy)/target/
/drip_parser.tar.gz
\ No newline at end of file
/drip_parser.tar.gz
/docs/playbooks/set_dockerhub_credentials.yml
\ No newline at end of file
version: "3.3"
services:
monitor-server:
image: salmant/ul_monitoring_server_container_image
deploy:
replicas: 1
update_config:
parallelism: 1
delay: 10s
restart_policy:
condition: any
placement:
constraints: [node.role == manager]
environment:
- MONITORING_SERVER=147.228.242.238
ports:
- "8080:8080"
- "4242:4242"
- "4245:4245"
- "7199:7199"
- "7000:7000"
- "7001:7001"
- "9160:9160"
- "9042:9042"
- "8012:8012"
- "61621:61621"
hostname: monitor-server
########################################################################
# Currently, in order to monitor, you need a monitor-adapter container #
# in each host docker with ports 4242, 4245 and 8125 published. #
########################################################################
version: "3.3"
services:
kamailio:
image: wtelecom/kamailio:switch
deploy:
replicas: 1
update_config:
parallelism: 1
delay: 10s
restart_policy:
condition: any
placement:
constraints: [node.role == manager]
depends_on:
- db
- asterisk
- doubango
- monitor-adapter
environment:
- SUBNET_OVERLAY=192.168.2.0
volumes:
- /etc/hostname:/docker-entrypoint/hostname:ro
ports:
- target: 5060
published: 5060
protocol: udp
mode: host
- target: 5061
published: 5061
protocol: udp
mode: host
hostname: kamailio
networks:
uc-ol:
aliases:
- kamailio
rtpengine:
image: wtelecom/rtpengine:switch
deploy:
replicas: 1
update_config:
parallelism: 1
delay: 10s
restart_policy:
condition: any
depends_on:
- db
- kamailio
- monitor-adapter
environment:
- PORT_MAX=10050
- PORT_MIN=10000
- SUBNET_OVERLAY=192.168.2.0
- BRIDGE_MODE=YES
volumes:
- /etc/hostname:/docker-entrypoint/hostname:ro
ports:
- target: 10000
published: 10000
protocol: udp
mode: host
- target: 10001
published: 10001
protocol: udp
mode: host
- target: 10002
published: 10002
protocol: udp
mode: host
- target: 10003
published: 10003
protocol: udp
mode: host
- target: 10004
published: 10004
protocol: udp
mode: host
- target: 10005
published: 10005
protocol: udp
mode: host
- target: 10006
published: 10006
protocol: udp
mode: host
- target: 10007
published: 10007
protocol: udp
mode: host
- target: 10008
published: 10008
protocol: udp
mode: host
- target: 10009
published: 10009
protocol: udp
mode: host
- target: 10010
published: 10010
protocol: udp
mode: host
- target: 10011
published: 10011
protocol: udp
mode: host
- target: 10012
published: 10012
protocol: udp
mode: host
- target: 10013
published: 10013
protocol: udp
mode: host
- target: 10014
published: 10014
protocol: udp
mode: host
- target: 10015
published: 10015
protocol: udp
mode: host
- target: 10016
published: 10016
protocol: udp
mode: host
- target: 10017
published: 10017
protocol: udp
mode: host
- target: 10018
published: 10018
protocol: udp
mode: host
- target: 10019
published: 10019
protocol: udp
mode: host
- target: 10020
published: 10020
protocol: udp
mode: host
- target: 10021
published: 10021
protocol: udp
mode: host
- target: 10022
published: 10022
protocol: udp
mode: host
- target: 10023
published: 10023
protocol: udp
mode: host
- target: 10024
published: 10024
protocol: udp
mode: host
- target: 10025
published: 10025
protocol: udp
mode: host
- target: 10026
published: 10026
protocol: udp
mode: host
- target: 10027
published: 10027
protocol: udp
mode: host
- target: 10028
published: 10028
protocol: udp
mode: host
- target: 10029
published: 10029
protocol: udp
mode: host
- target: 10030
published: 10030
protocol: udp
mode: host
- target: 10031
published: 10031
protocol: udp
mode: host
- target: 10032
published: 10032
protocol: udp
mode: host
- target: 10033
published: 10033
protocol: udp
mode: host
- target: 10034
published: 10034
protocol: udp
mode: host
- target: 10035
published: 10035
protocol: udp
mode: host
- target: 10036
published: 10036
protocol: udp
mode: host
- target: 10037
published: 10037
protocol: udp
mode: host
- target: 10038
published: 10038
protocol: udp
mode: host
- target: 10039
published: 10039
protocol: udp
mode: host
- target: 10040
published: 10040
protocol: udp
mode: host
- target: 10041
published: 10041
protocol: udp
mode: host
- target: 10042
published: 10042
protocol: udp
mode: host
- target: 10043
published: 10043
protocol: udp
mode: host
- target: 10044
published: 10044
protocol: udp
mode: host
- target: 10045
published: 10045
protocol: udp
mode: host
- target: 10046
published: 10046
protocol: udp
mode: host
- target: 10047
published: 10047
protocol: udp
mode: host
- target: 10048
published: 10048
protocol: udp
mode: host
- target: 10049
published: 10049
protocol: udp
mode: host
- target: 10050
published: 10050
protocol: udp
mode: host
networks:
uc-ol:
aliases:
- rtpengine
#asterisk:
#image: wtelecom/asterisk:switch
#deploy:
#replicas: 1
#update_config:
#parallelism: 1
#delay: 10s
#restart_policy:
#condition: any
#depends_on:
#- db
#- monitor-adapter
#hostname: asterisk
#networks:
#uc-ol:
#aliases:
#- asterisk
#doubango:
#image: wtelecom/doubango:switch
#deploy:
#replicas: 1
#update_config:
#parallelism: 1
#delay: 10s
#restart_policy:
#condition: any
#depends_on:
#- db
#- monitor-adapter
#hostname: doubango
#networks:
#uc-ol:
#aliases:
#- doubango
db:
image: wtelecom/mysql:switch
deploy:
replicas: 1
update_config:
parallelism: 1
delay: 10s
restart_policy:
condition: any
placement:
constraints: [node.role == manager]
depends_on:
- monitor-adapter
environment:
- MYSQL_ROOT_PASSWORD=supersecret
volumes:
- db-volume:/var/lib/mysql
hostname: db
networks:
uc-ol:
aliases:
- db
#monitor-server:
#image: salmant/ul_monitoring_server_container_image
#deploy:
#replicas: 1
#update_config:
#parallelism: 1
#delay: 10s
#restart_policy:
#condition: any
#placement:
#constraints: [node.role == manager]
#environment:
#- MONITORING_SERVER=monitor-server
#ports:
#- "8080:8080"
#- "4242:4242"
#- "4245:4245"
#- "7199:7199"
#- "7000:7000"
#- "7001:7001"
#- "9160:9160"
#- "9042:9042"
#- "8012:8012"
#- "61621:61621"
#hostname: monitor-server
#monitor-adapter:
#image: beia/monitoring_agent
#deploy:
#replicas: 1
#update_config:
#parallelism: 1
#delay: 10s
#restart_policy:
#condition: any
#placement:
#constraints: [node.role == manager]
#environment:
#- MONITORING_SERVER=monitor-server
#- MONITORING_PREFIX=eu.switch.wt
#hostname: monitor-adapter
#networks:
#uc-ol:
#aliases:
#- monitor-adapter
volumes:
db-volume:
networks:
uc-ol:
driver: overlay
ipam:
driver: default
config:
- subnet: 192.168.2.0/24
......@@ -297,9 +297,12 @@ public class DeployService {
MessageParameter configurationParameter = createConfigurationParameter(configurationID, "composer");
Map<String, String> attributes = new HashMap<>();
attributes.put("name", configurationID);
// attributes.put("docker_login_username", dockerLogin.get("username"));
// attributes.put("docker_login_password", dockerLogin.get("password"));
// attributes.put("docker_login_registry", dockerLogin.get("registry"));
if (dockerLogin != null) {
attributes.put("docker_login_username", dockerLogin.get("username"));
attributes.put("docker_login_password", dockerLogin.get("password"));
attributes.put("docker_login_registry", dockerLogin.get("registry"));
}
configurationParameter.setAttributes(attributes);
return configurationParameter;
}
......
......@@ -39,6 +39,10 @@ import yaml
import sys
from results_collector import ResultsCollector
from drip_logging.drip_logging_handler import *
import multiprocessing
from ansible.executor.task_executor import TaskExecutor
from ansible.playbook import Playbook
logger = logging.getLogger(__name__)
if not getattr(logger, 'handler_set', None):
......@@ -53,7 +57,7 @@ if not getattr(logger, 'handler_set', None):
retry=0
def install_prerequisites(vm):
def install_prerequisites(vm,return_dict):
try:
logger.info("Installing ansible prerequisites on: "+vm.ip)
ssh = paramiko.SSHClient()
......@@ -77,13 +81,49 @@ def install_prerequisites(vm):
if retry < 10:
logger.warning(vm.ip + " " + str(e)+". Retrying")
retry+=1
return install_prerequisites(vm)
return install_prerequisites(vm,return_dict)
logger.error(vm.ip + " " + str(e))
return_dict[vm.ip] = "ERROR:"+vm.ip+" "+str(e)
return "ERROR:"+vm.ip+" "+str(e)
ssh.close()
return_dict[vm.ip] = "SUCCESS"
return "SUCCESS"
def run_faied(failed_tasks,inventory,variable_manager,loader,options,passwords,results_callback,playbook_path):
tasks = []
hosts = []
tqm = TaskQueueManager(inventory=inventory, variable_manager=variable_manager, loader=loader, options=options, passwords=passwords)
yml_plays = {}
with open(playbook_path) as stream:
yml_plays = yaml.load(stream)
failed_yml = {}
retry_task = []
hosts = []
for failed_task in failed_tasks:
name = failed_task._task.get_name()
host = failed_task._host.get_name()
for play in yml_plays:
for task in play['tasks']:
if name in task['name']:
retry_task.append(task)
hosts.append(host)
failed_yml['hosts'] = hosts
failed_yml['tasks'] = retry_task
#with open('/tmp/failed.yml', 'w') as outfile:
#yaml.dump(failed_yml, outfile, default_flow_style=False)
#play = Play().load('/tmp/failed.yml', variable_manager=variable_manager, loader=loader)
#res = tqm.run(play=play)
def execute_playbook(hosts, playbook_path,user,ssh_key_file,extra_vars,passwords):
if not os.path.exists(playbook_path):
logger.error('[ERROR] The playbook does not exist')
......@@ -91,15 +131,20 @@ def execute_playbook(hosts, playbook_path,user,ssh_key_file,extra_vars,passwords
os.environ['ANSIBLE_HOST_KEY_CHECKING'] = 'false'
ansible.constants.HOST_KEY_CHECKING = False
os.environ['ANSIBLE_SSH_RETRIES'] = '20'
ansible.constants.ANSIBLE_SSH_RETRIES = 20
variable_manager = VariableManager()
loader = DataLoader()
inventory = Inventory(loader=loader, variable_manager=variable_manager, host_list=hosts)
Options = namedtuple('Options', ['listtags', 'listtasks', 'listhosts', 'syntax', 'connection','module_path', 'forks', 'remote_user', 'private_key_file', 'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args', 'scp_extra_args', 'become', 'become_method', 'become_user', 'verbosity', 'check','host_key_checking'])
Options = namedtuple('Options', ['listtags', 'listtasks', 'listhosts', 'syntax', 'connection','module_path', 'forks', 'remote_user', 'private_key_file', 'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args', 'scp_extra_args', 'become', 'become_method', 'become_user', 'verbosity', 'check','host_key_checking','retries'])
options = Options(listtags=False, listtasks=False, listhosts=False, syntax=False, connection='smart', module_path=None, forks=None, remote_user=user, private_key_file=ssh_key_file, ssh_common_args='', ssh_extra_args='', sftp_extra_args=None, scp_extra_args=None, become=True, become_method='sudo', become_user='root', verbosity=None, check=False , host_key_checking=False)
options = Options(listtags=False, listtasks=False, listhosts=False, syntax=False, connection='smart', module_path=None, forks=None, remote_user=user, private_key_file=ssh_key_file, ssh_common_args='', ssh_extra_args='', sftp_extra_args=None, scp_extra_args=None, become=True, become_method='sudo', become_user='root', verbosity=None, check=False , host_key_checking=False, retries=20)
variable_manager.extra_vars = extra_vars
......@@ -114,40 +159,70 @@ def execute_playbook(hosts, playbook_path,user,ssh_key_file,extra_vars,passwords
pbex._tqm._stdout_callback = results_callback
results = pbex.run()
ok = results_callback.host_ok
answer = []
for res in ok:
failed_tasks = []
for res in ok:
failed_tasks.append(res['result'])
resp = json.dumps({"host":res['ip'], "result":res['result']._result,"task":res['task']})
logger.info(resp)
#logger.info(resp)
answer.append({"host":res['ip'], "result":res['result']._result,"task":res['task']})
unreachable = results_callback.host_unreachable
unreachable = results_callback.host_unreachable
for res in unreachable:
failed_tasks.append(res['task'])
resp = json.dumps({"host":res['ip'], "result":res['result']._result,"task":res['task']})
logger.info(resp)
answer.append({"host":res['ip'], "result":res['result']._result,"task":res['task']})
host_failed = results_callback.host_failed
for res in host_failed:
resp = json.dumps({"host":res['ip'], "result":res['result']._result, "task":res['task']})
logger.info(resp)
#logger.info(resp)
answer.append({"host":res['ip'], "result":res['result']._result,"task":res['task']})
if failed_tasks:
run_faied(failed_tasks,inventory,variable_manager,loader,options,passwords,results_callback,playbook_path)
return json.dumps(answer)
def run(vm_list,playbook_path,rabbitmq_host,owner):
#Create /playbook.retry
hosts=""
ssh_key_file=""
rabbit = DRIPLoggingHandler(host=rabbitmq_host, port=5672,user=owner)
logger.addHandler(rabbit)
manager = multiprocessing.Manager()
return_dict = manager.dict()
jobs = []
for vm in vm_list:
ret = install_prerequisites(vm)
hosts+=vm.ip+","
ssh_key_file = vm.key
user = vm.user
if "ERROR" in ret: return ret
#ret = install_prerequisites(vm)
#p = multiprocessing.Process(target=install_prerequisites, args=(vm,return_dict,))
#jobs.append(p)
#p.start()
hosts+=vm.ip+","
ssh_key_file = vm.key
user = vm.user
#for proc in jobs:
#proc.join()
#if "ERROR" in return_dict.values(): return "ERROR"
extra_vars = {}
passwords = {}
......
......@@ -47,9 +47,9 @@ def deploy_compose(vm, compose_file, compose_name,docker_login):
sftp.put(compose_file, "docker-compose.yml")
if(docker_login):
stdin, stdout, stderr = ssh.exec_command("docker login -u "+docker_login['username']+" -p "+docker_login['password']+" "+docker_login['registry']+" && sudo sudo docker stack deploy --compose-file /tmp/docker-compose.yml %s" % (compose_name))
stdin, stdout, stderr = ssh.exec_command("sudo docker login -u "+docker_login['username']+" -p "+docker_login['password']+" "+docker_login['registry']+" && sudo docker stack deploy --with-registry-auth --compose-file /tmp/docker-compose.yml %s" % (compose_name))
else:
stdin, stdout, stderr = ssh.exec_command("sudo docker stack deploy --compose-file /tmp/docker-compose.yml %s" % (compose_name))
stdin, stdout, stderr = ssh.exec_command("sudo docker stack deploy --with-registry-auth --compose-file /tmp/docker-compose.yml %s" % (compose_name))
stdout.read()
logger.info("Finished docker compose deployment on: "+vm.ip)
except Exception as e:
......@@ -57,7 +57,7 @@ def deploy_compose(vm, compose_file, compose_name,docker_login):
if retry < 10:
logger.warning(vm.ip + " " + str(e)+". Retrying")
retry+=1
return deploy_compose(vm, compose_file, compose_name)
return deploy_compose(vm, compose_file, compose_name,docker_login)
logger.error(vm.ip + " " + str(e))
return "ERROR:" + vm.ip + " " + str(e)
ssh.close()
......
......@@ -66,7 +66,7 @@ def install_engine(vm,return_dict):
if retry < 10:
logger.warning(vm.ip + " " + str(e)+". Retrying")
retry+=1
return install_engine(vm,procnum)
return install_engine(vm,return_dict)
logger.error(vm.ip + " " + str(e))
return_dict[vm.ip] = "ERROR:"+vm.ip+" "+str(e)
......
......@@ -96,7 +96,7 @@ def install_worker(join_cmd, vm,return_dict):
if retry < 10:
logger.warning(vm.ip + " " + str(e)+". Retrying")
retry+=1
return install_worker(join_cmd, vm)
return install_worker(join_cmd, vm,return_dict)
logger.error(vm.ip + " " + str(e))
return_dict[vm.ip] = "ERROR:"+vm.ip+" "+str(e)
return "ERROR:" + vm.ip + " " + str(e)
......
CESNET&&https://carach5.ics.muni.cz:11443&&ubuntu 14.04&&medium&&http://fedcloud.egi.eu/occi/compute/flavour/1.0#medium&&http://occi.carach5.ics.muni.cz/occi/infrastructure/os_tpl#uuid_38d42ca1_f4e9_5b5c_98de_37eb2d26301a_warg_default_shared_218&&ubuntu
CESNET&&https://carach5.ics.muni.cz:11443&&ubuntu 14.04&&extra_large&&http://schemas.fedcloud.egi.eu/occi/infrastructure/resource_tpl#extra_large&&http://occi.carach5.ics.muni.cz/occi/infrastructure/os_tpl#uuid_38d42ca1_f4e9_5b5c_98de_37eb2d26301a_warg_default_shared_218&&ubuntu
CESNET&&https://carach5.ics.muni.cz:11443&&ubuntu 16.04&&medium&&http://fedcloud.egi.eu/occi/compute/flavour/1.0#medium&&http://occi.carach5.ics.muni.cz/occi/infrastructure/os_tpl#uuid_8df7ba00_8467_57aa_bf1e_05754a2a73bf_warg_default_shared_219&&ubuntu
CESNET&&https://carach5.ics.muni.cz:11443&&ubuntu 16.04&&medium&&http://fedcloud.egi.eu/occi/compute/flavour/1.0#mem_large&&http://occi.carach5.ics.muni.cz/occi/infrastructure/os_tpl#uuid_8df7ba00_8467_57aa_bf1e_05754a2a73bf_warg_default_shared_219&&ubuntu
CESNET&&https://carach5.ics.muni.cz:11443&&ubuntu 16.04&&0#mem_large&&http://fedcloud.egi.eu/occi/compute/flavour/1.0#mem_large&&http://occi.carach5.ics.muni.cz/occi/infrastructure/os_tpl#uuid_8df7ba00_8467_57aa_bf1e_05754a2a73bf_warg_default_shared_219&&ubuntu
CESNET&&https://carach5.ics.muni.cz:11443&&ubuntu 16.04&&extra_large&&http://schemas.fedcloud.egi.eu/occi/infrastructure/resource_tpl#extra_large&&http://occi.carach5.ics.muni.cz/occi/infrastructure/os_tpl#uuid_8df7ba00_8467_57aa_bf1e_05754a2a73bf_warg_default_shared_219&&ubuntu
CESNET&&https://carach5.ics.muni.cz:11443&&ubuntu 16.04&&small&&http://fedcloud.egi.eu/occi/compute/flavour/1.0#small&&http://occi.carach5.ics.muni.cz/occi/infrastructure/os_tpl#uuid_8df7ba00_8467_57aa_bf1e_05754a2a73bf_warg_default_shared_219&&ubuntu
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment