Coverage for sfkit/utils/gwas_protocol.py: 100%
147 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-07 15:11 -0400
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-07 15:11 -0400
1import fileinput
2import multiprocessing
3import os
4import time
6from google.cloud import storage
7from sfkit.utils import constants
8from sfkit.api import get_doc_ref_dict, update_firestore, website_send_file
9from sfkit.utils.helper_functions import (
10 copy_results_to_cloud_storage,
11 copy_to_out_folder,
12 plot_assoc,
13 postprocess_assoc,
14 run_command,
15)
18def run_gwas_protocol(role: str, demo: bool = False) -> None:
19 print("\n\n Begin running GWAS protocol \n\n")
20 if not (constants.IS_DOCKER or constants.IS_INSTALLED_VIA_SCRIPT):
21 install_gwas_dependencies()
22 install_gwas_repo()
23 install_ntl_library()
24 compile_gwas_code()
25 if not demo:
26 update_parameters(role)
27 # connect_to_other_vms(role)
28 prepare_data(constants.ENCRYPTED_DATA_FOLDER, role)
29 copy_data_to_gwas_repo(constants.ENCRYPTED_DATA_FOLDER, role)
30 sync_with_other_vms(role)
31 start_datasharing(role, demo)
32 start_gwas(role, demo)
35def install_gwas_dependencies() -> None:
36 update_firestore("update_firestore::task=Installing dependencies")
37 print("\n\n Begin installing dependencies \n\n")
38 commands = """sudo apt-get --assume-yes update
39 sudo apt-get --assume-yes install build-essential
40 sudo apt-get --assume-yes install clang-3.9
41 sudo apt-get --assume-yes install libgmp3-dev
42 sudo apt-get --assume-yes install libssl-dev
43 sudo apt-get --assume-yes install libsodium-dev
44 sudo apt-get --assume-yes install libomp-dev
45 sudo apt-get --assume-yes install netcat
46 sudo apt-get --assume-yes install git
47 sudo apt-get --assume-yes install python3-pip
48 sudo pip3 install numpy"""
49 for command in commands.split("\n"):
50 run_command(command)
51 print("\n\n Finished installing dependencies \n\n")
54def install_gwas_repo() -> None:
55 update_firestore("update_firestore::task=Installing GWAS repo")
56 print("\n\n Begin installing GWAS repo \n\n")
57 command = "git clone https://github.com/hcholab/secure-gwas secure-gwas"
58 run_command(command)
59 print("\n\n Finished installing GWAS repo \n\n")
62def install_ntl_library() -> None:
63 update_firestore("update_firestore::task=Installing NTL library")
64 print("\n\n Begin installing NTL library \n\n")
65 commands = """curl https://libntl.org/ntl-10.3.0.tar.gz --output ntl-10.3.0.tar.gz
66 tar -zxvf ntl-10.3.0.tar.gz
67 cp secure-gwas/code/NTL_mod/ZZ.h ntl-10.3.0/include/NTL/
68 cp secure-gwas/code/NTL_mod/ZZ.cpp ntl-10.3.0/src/
69 cd ntl-10.3.0/src && ./configure NTL_THREAD_BOOST=on
70 cd ntl-10.3.0/src && make all
71 cd ntl-10.3.0/src && sudo make install"""
72 for command in commands.split("\n"):
73 run_command(command)
74 print("\n\n Finished installing NTL library \n\n")
77def compile_gwas_code() -> None:
78 update_firestore("update_firestore::task=Compiling GWAS code")
79 print("\n\n Begin compiling GWAS code \n\n")
80 command = """cd secure-gwas/code && COMP=$(which clang++) &&\
81 sed -i "s|^CPP.*$|CPP = ${COMP}|g" Makefile &&\
82 sed -i "s|^INCPATHS.*$|INCPATHS = -I/usr/local/include|g" Makefile &&\
83 sed -i "s|^LDPATH.*$|LDPATH = -L/usr/local/lib|g" Makefile &&\
84 sudo make"""
85 run_command(command)
86 print("\n\n Finished compiling GWAS code \n\n")
89def update_parameters(role: str) -> None:
90 print(f"\n\n Updating parameters in 'secure-gwas/par/test.par.{role}.txt'\n\n")
92 doc_ref_dict = get_doc_ref_dict()
94 # shared parameters and advanced parameters
95 pars = {**doc_ref_dict["parameters"], **doc_ref_dict["advanced_parameters"]}
97 # individual parameters
98 for i in range(1, len(doc_ref_dict["participants"])):
99 pars[f"NUM_INDS_SP_{i}"] = doc_ref_dict["personal_parameters"][doc_ref_dict["participants"][i]]["NUM_INDS"]
101 pars["NUM_INDS"] = {"value": ""}
102 pars["NUM_INDS"]["value"] = str(int(pars["NUM_INDS_SP_1"]["value"]) + int(pars["NUM_INDS_SP_2"]["value"]))
104 # num threads = num_cpus = $(nproc)
105 num_cpus = str(multiprocessing.cpu_count())
106 pars["NUM_THREADS"] = {"value": num_cpus}
107 update_firestore(f"update_firestore::NUM_THREADS={num_cpus}")
108 update_firestore(f"update_firestore::NUM_CPUS={num_cpus}")
110 # update pars with ipaddresses and ports
111 for i in range(len(doc_ref_dict["participants"])):
112 ip = doc_ref_dict["personal_parameters"][doc_ref_dict["participants"][i]]["IP_ADDRESS"]["value"]
113 while ip == "":
114 print(f"IP address for {doc_ref_dict['participants'][i]} is empty. Waiting...")
115 time.sleep(5)
117 doc_ref_dict = get_doc_ref_dict()
118 ip = doc_ref_dict["personal_parameters"][doc_ref_dict["participants"][i]]["IP_ADDRESS"]["value"]
120 pars[f"IP_ADDR_P{i}"] = {"value": ip}
122 ports = doc_ref_dict["personal_parameters"][doc_ref_dict["participants"][i]]["PORTS"]["value"]
123 for j in range(i + 1, 3):
124 pars[f"PORT_P{i}_P{j}"] = {"value": ports.split(",")[j]}
126 for line in fileinput.input(f"{constants.EXECUTABLES_PREFIX}secure-gwas/par/test.par.{role}.txt", inplace=True):
127 key = str(line).split(" ")[0]
128 if key in pars:
129 line = f"{key} " + str(pars[key]["value"]) + "\n"
130 print(line, end="")
133# this function currently has 2 main problems: 1. the other machine doesn't consistently receive the connection 2. it uses the same ports as the main protocol, causing a conflict
134# def connect_to_other_vms(role: str) -> None:
135# print("\n\n Begin connecting to other VMs \n\n")
137# doc_ref_dict: dict = get_doc_ref_dict()
139# for i in range(int(role)):
140# other_user = doc_ref_dict["participants"][i]
141# ip_address = doc_ref_dict["personal_parameters"][other_user]["IP_ADDRESS"]["value"]
142# port = int(doc_ref_dict["personal_parameters"][other_user]["PORTS"]["value"].split(",")[int(role)])
144# while ip_address == "" or port == "":
145# print(f"Waiting for {other_user} to finish setting up...")
146# time.sleep(5)
147# doc_ref_dict = get_doc_ref_dict()
148# ip_address = doc_ref_dict["personal_parameters"][other_user]["IP_ADDRESS"]["value"]
149# port = int(doc_ref_dict["personal_parameters"][other_user]["PORTS"]["value"].split(",")[int(role)])
151# print(f"Connecting to {other_user} at {ip_address}:{port}...")
153# while True:
154# try:
155# sock = socket.create_connection((ip_address, port), timeout=5)
156# sock.close()
157# break
158# except socket.timeout:
159# print(f"Timed out while connecting to {other_user} at {ip_address}:{port}. Trying again...")
160# except socket.error:
161# print(f"Error while connecting to {other_user} at {ip_address}:{port}. Trying again...")
162# time.sleep(5)
163# print("\n\n Finished connecting to other VMs \n\n")
166def prepare_data(data_path: str, role: str) -> None:
167 doc_ref_dict: dict = get_doc_ref_dict()
168 study_title: str = doc_ref_dict["title"]
170 if role == "0":
171 run_command(f"mkdir -p {data_path}")
172 storage.Client().bucket("sfkit").blob(f"{study_title}/pos.txt").download_to_filename(f"{data_path}/pos.txt")
175def copy_data_to_gwas_repo(
176 data_path: str, role: str
177) -> None: # TODO: change the path in parameter file instead? Or move instead of copy?
178 print("\n\n Copy data to GWAS repo \n\n")
179 commands = f"""cp '{data_path}'/g.bin secure-gwas/test_data/g.bin
180 cp '{data_path}'/m.bin secure-gwas/test_data/m.bin
181 cp '{data_path}'/p.bin secure-gwas/test_data/p.bin
182 cp '{data_path}'/other_shared_key.bin secure-gwas/test_data/other_shared_key.bin
183 cp '{data_path}'/pos.txt secure-gwas/test_data/pos.txt"""
185 if role == "0":
186 commands = f"cp '{data_path}'/pos.txt secure-gwas/test_data/pos.txt"
188 for command in commands.split("\n"):
189 run_command(command)
190 print("\n\n Finished copying data to GWAS repo \n\n")
193def sync_with_other_vms(role: str) -> None:
194 update_firestore("update_firestore::status=syncing up")
195 update_firestore("update_firestore::task=Syncing up machines")
196 print("Begin syncing up")
197 # wait until all participants have the status of starting data sharing protocol
198 while True:
199 doc_ref_dict: dict = get_doc_ref_dict()
200 statuses = doc_ref_dict["status"].values()
201 if all(status == "syncing up" for status in statuses):
202 break
203 print("Waiting for all participants to sync up...")
204 time.sleep(5)
205 time.sleep(15 + 15 * int(role))
206 print("Finished syncing up")
209def start_datasharing(role: str, demo: bool) -> None:
210 update_firestore("update_firestore::task=Performing data sharing protocol")
211 print("\n\n starting data sharing protocol \n\n")
212 if demo:
213 command = f"cd {constants.EXECUTABLES_PREFIX}secure-gwas/code && bash run_example_datasharing.sh"
214 else:
215 command = f"export PYTHONUNBUFFERED=TRUE && cd {constants.EXECUTABLES_PREFIX}secure-gwas/code && bin/DataSharingClient '{role}' ../par/test.par.'{role}'.txt"
216 if role != "0":
217 command += " ../test_data/"
218 run_command(command, fail_message="Failed MPC-GWAS data sharing protocol")
219 print("\n\n Finished data sharing protocol\n\n")
222def start_gwas(role: str, demo: bool) -> None:
223 update_firestore("update_firestore::task=Performing GWAS protocol")
224 print("Sleeping before starting GWAS")
225 time.sleep(100 + 30 * int(role))
226 print("\n\n starting GWAS \n\n")
227 update_firestore("update_firestore::status=starting GWAS")
228 if demo:
229 command = f"cd {constants.EXECUTABLES_PREFIX}secure-gwas/code && bash run_example_gwas.sh"
230 else:
231 command = f"export PYTHONUNBUFFERED=TRUE && cd {constants.EXECUTABLES_PREFIX}secure-gwas/code && bin/GwasClient '{role}' ../par/test.par.'{role}'.txt"
232 run_command(command, fail_message="Failed MPC-GWAS protocol")
233 print("\n\n Finished GWAS \n\n")
235 if role != "0":
236 process_output_files(role, demo)
237 else:
238 update_firestore("update_firestore::status=Finished protocol!")
241def process_output_files(role: str, demo: bool) -> None:
242 # sourcery skip: assign-if-exp, introduce-default-else, swap-if-expression
243 doc_ref_dict = get_doc_ref_dict()
244 num_inds_total = 1_000
245 if not demo:
246 num_inds_total = sum(
247 int(doc_ref_dict["personal_parameters"][user]["NUM_INDS"]["value"])
248 for user in doc_ref_dict["participants"]
249 )
250 num_covs = int(doc_ref_dict["parameters"]["NUM_COVS"]["value"])
252 postprocess_assoc(
253 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/new_assoc.txt",
254 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/test_assoc.txt",
255 f"{constants.EXECUTABLES_PREFIX}secure-gwas/test_data/pos.txt",
256 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/test_gkeep1.txt",
257 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/test_gkeep2.txt",
258 num_inds_total,
259 num_covs,
260 )
261 plot_assoc(
262 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/manhattan.png",
263 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/new_assoc.txt",
264 )
266 doc_ref_dict: dict = get_doc_ref_dict()
267 user_id: str = doc_ref_dict["participants"][int(role)]
269 relevant_paths = [f"{constants.EXECUTABLES_PREFIX}secure-gwas/out"]
270 copy_to_out_folder(relevant_paths)
272 if results_path := doc_ref_dict["personal_parameters"][user_id].get("RESULTS_PATH", {}).get("value", ""):
273 copy_results_to_cloud_storage(role, results_path, f"{constants.EXECUTABLES_PREFIX}secure-gwas/out")
275 send_results: str = doc_ref_dict["personal_parameters"][user_id].get("SEND_RESULTS", {}).get("value")
276 if send_results == "Yes":
277 with open(f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/new_assoc.txt", "r") as file:
278 website_send_file(file, "new_assoc.txt")
280 with open(f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/manhattan.png", "rb") as file:
281 website_send_file(file, "manhattan.png")
283 update_firestore("update_firestore::status=Finished protocol!")