Coverage for sfkit/utils/gwas_protocol.py: 100%

147 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-07 15:11 -0400

1import fileinput 

2import multiprocessing 

3import os 

4import time 

5 

6from google.cloud import storage 

7from sfkit.utils import constants 

8from sfkit.api import get_doc_ref_dict, update_firestore, website_send_file 

9from sfkit.utils.helper_functions import ( 

10 copy_results_to_cloud_storage, 

11 copy_to_out_folder, 

12 plot_assoc, 

13 postprocess_assoc, 

14 run_command, 

15) 

16 

17 

18def run_gwas_protocol(role: str, demo: bool = False) -> None: 

19 print("\n\n Begin running GWAS protocol \n\n") 

20 if not (constants.IS_DOCKER or constants.IS_INSTALLED_VIA_SCRIPT): 

21 install_gwas_dependencies() 

22 install_gwas_repo() 

23 install_ntl_library() 

24 compile_gwas_code() 

25 if not demo: 

26 update_parameters(role) 

27 # connect_to_other_vms(role) 

28 prepare_data(constants.ENCRYPTED_DATA_FOLDER, role) 

29 copy_data_to_gwas_repo(constants.ENCRYPTED_DATA_FOLDER, role) 

30 sync_with_other_vms(role) 

31 start_datasharing(role, demo) 

32 start_gwas(role, demo) 

33 

34 

35def install_gwas_dependencies() -> None: 

36 update_firestore("update_firestore::task=Installing dependencies") 

37 print("\n\n Begin installing dependencies \n\n") 

38 commands = """sudo apt-get --assume-yes update 

39 sudo apt-get --assume-yes install build-essential 

40 sudo apt-get --assume-yes install clang-3.9 

41 sudo apt-get --assume-yes install libgmp3-dev 

42 sudo apt-get --assume-yes install libssl-dev 

43 sudo apt-get --assume-yes install libsodium-dev 

44 sudo apt-get --assume-yes install libomp-dev 

45 sudo apt-get --assume-yes install netcat 

46 sudo apt-get --assume-yes install git 

47 sudo apt-get --assume-yes install python3-pip 

48 sudo pip3 install numpy""" 

49 for command in commands.split("\n"): 

50 run_command(command) 

51 print("\n\n Finished installing dependencies \n\n") 

52 

53 

54def install_gwas_repo() -> None: 

55 update_firestore("update_firestore::task=Installing GWAS repo") 

56 print("\n\n Begin installing GWAS repo \n\n") 

57 command = "git clone https://github.com/hcholab/secure-gwas secure-gwas" 

58 run_command(command) 

59 print("\n\n Finished installing GWAS repo \n\n") 

60 

61 

62def install_ntl_library() -> None: 

63 update_firestore("update_firestore::task=Installing NTL library") 

64 print("\n\n Begin installing NTL library \n\n") 

65 commands = """curl https://libntl.org/ntl-10.3.0.tar.gz --output ntl-10.3.0.tar.gz 

66 tar -zxvf ntl-10.3.0.tar.gz 

67 cp secure-gwas/code/NTL_mod/ZZ.h ntl-10.3.0/include/NTL/ 

68 cp secure-gwas/code/NTL_mod/ZZ.cpp ntl-10.3.0/src/ 

69 cd ntl-10.3.0/src && ./configure NTL_THREAD_BOOST=on 

70 cd ntl-10.3.0/src && make all 

71 cd ntl-10.3.0/src && sudo make install""" 

72 for command in commands.split("\n"): 

73 run_command(command) 

74 print("\n\n Finished installing NTL library \n\n") 

75 

76 

77def compile_gwas_code() -> None: 

78 update_firestore("update_firestore::task=Compiling GWAS code") 

79 print("\n\n Begin compiling GWAS code \n\n") 

80 command = """cd secure-gwas/code && COMP=$(which clang++) &&\ 

81 sed -i "s|^CPP.*$|CPP = ${COMP}|g" Makefile &&\ 

82 sed -i "s|^INCPATHS.*$|INCPATHS = -I/usr/local/include|g" Makefile &&\ 

83 sed -i "s|^LDPATH.*$|LDPATH = -L/usr/local/lib|g" Makefile &&\ 

84 sudo make""" 

85 run_command(command) 

86 print("\n\n Finished compiling GWAS code \n\n") 

87 

88 

89def update_parameters(role: str) -> None: 

90 print(f"\n\n Updating parameters in 'secure-gwas/par/test.par.{role}.txt'\n\n") 

91 

92 doc_ref_dict = get_doc_ref_dict() 

93 

94 # shared parameters and advanced parameters 

95 pars = {**doc_ref_dict["parameters"], **doc_ref_dict["advanced_parameters"]} 

96 

97 # individual parameters 

98 for i in range(1, len(doc_ref_dict["participants"])): 

99 pars[f"NUM_INDS_SP_{i}"] = doc_ref_dict["personal_parameters"][doc_ref_dict["participants"][i]]["NUM_INDS"] 

100 

101 pars["NUM_INDS"] = {"value": ""} 

102 pars["NUM_INDS"]["value"] = str(int(pars["NUM_INDS_SP_1"]["value"]) + int(pars["NUM_INDS_SP_2"]["value"])) 

103 

104 # num threads = num_cpus = $(nproc) 

105 num_cpus = str(multiprocessing.cpu_count()) 

106 pars["NUM_THREADS"] = {"value": num_cpus} 

107 update_firestore(f"update_firestore::NUM_THREADS={num_cpus}") 

108 update_firestore(f"update_firestore::NUM_CPUS={num_cpus}") 

109 

110 # update pars with ipaddresses and ports 

111 for i in range(len(doc_ref_dict["participants"])): 

112 ip = doc_ref_dict["personal_parameters"][doc_ref_dict["participants"][i]]["IP_ADDRESS"]["value"] 

113 while ip == "": 

114 print(f"IP address for {doc_ref_dict['participants'][i]} is empty. Waiting...") 

115 time.sleep(5) 

116 

117 doc_ref_dict = get_doc_ref_dict() 

118 ip = doc_ref_dict["personal_parameters"][doc_ref_dict["participants"][i]]["IP_ADDRESS"]["value"] 

119 

120 pars[f"IP_ADDR_P{i}"] = {"value": ip} 

121 

122 ports = doc_ref_dict["personal_parameters"][doc_ref_dict["participants"][i]]["PORTS"]["value"] 

123 for j in range(i + 1, 3): 

124 pars[f"PORT_P{i}_P{j}"] = {"value": ports.split(",")[j]} 

125 

126 for line in fileinput.input(f"{constants.EXECUTABLES_PREFIX}secure-gwas/par/test.par.{role}.txt", inplace=True): 

127 key = str(line).split(" ")[0] 

128 if key in pars: 

129 line = f"{key} " + str(pars[key]["value"]) + "\n" 

130 print(line, end="") 

131 

132 

133# this function currently has 2 main problems: 1. the other machine doesn't consistently receive the connection 2. it uses the same ports as the main protocol, causing a conflict 

134# def connect_to_other_vms(role: str) -> None: 

135# print("\n\n Begin connecting to other VMs \n\n") 

136 

137# doc_ref_dict: dict = get_doc_ref_dict() 

138 

139# for i in range(int(role)): 

140# other_user = doc_ref_dict["participants"][i] 

141# ip_address = doc_ref_dict["personal_parameters"][other_user]["IP_ADDRESS"]["value"] 

142# port = int(doc_ref_dict["personal_parameters"][other_user]["PORTS"]["value"].split(",")[int(role)]) 

143 

144# while ip_address == "" or port == "": 

145# print(f"Waiting for {other_user} to finish setting up...") 

146# time.sleep(5) 

147# doc_ref_dict = get_doc_ref_dict() 

148# ip_address = doc_ref_dict["personal_parameters"][other_user]["IP_ADDRESS"]["value"] 

149# port = int(doc_ref_dict["personal_parameters"][other_user]["PORTS"]["value"].split(",")[int(role)]) 

150 

151# print(f"Connecting to {other_user} at {ip_address}:{port}...") 

152 

153# while True: 

154# try: 

155# sock = socket.create_connection((ip_address, port), timeout=5) 

156# sock.close() 

157# break 

158# except socket.timeout: 

159# print(f"Timed out while connecting to {other_user} at {ip_address}:{port}. Trying again...") 

160# except socket.error: 

161# print(f"Error while connecting to {other_user} at {ip_address}:{port}. Trying again...") 

162# time.sleep(5) 

163# print("\n\n Finished connecting to other VMs \n\n") 

164 

165 

166def prepare_data(data_path: str, role: str) -> None: 

167 doc_ref_dict: dict = get_doc_ref_dict() 

168 study_title: str = doc_ref_dict["title"] 

169 

170 if role == "0": 

171 run_command(f"mkdir -p {data_path}") 

172 storage.Client().bucket("sfkit").blob(f"{study_title}/pos.txt").download_to_filename(f"{data_path}/pos.txt") 

173 

174 

175def copy_data_to_gwas_repo( 

176 data_path: str, role: str 

177) -> None: # TODO: change the path in parameter file instead? Or move instead of copy? 

178 print("\n\n Copy data to GWAS repo \n\n") 

179 commands = f"""cp '{data_path}'/g.bin secure-gwas/test_data/g.bin 

180 cp '{data_path}'/m.bin secure-gwas/test_data/m.bin 

181 cp '{data_path}'/p.bin secure-gwas/test_data/p.bin 

182 cp '{data_path}'/other_shared_key.bin secure-gwas/test_data/other_shared_key.bin 

183 cp '{data_path}'/pos.txt secure-gwas/test_data/pos.txt""" 

184 

185 if role == "0": 

186 commands = f"cp '{data_path}'/pos.txt secure-gwas/test_data/pos.txt" 

187 

188 for command in commands.split("\n"): 

189 run_command(command) 

190 print("\n\n Finished copying data to GWAS repo \n\n") 

191 

192 

193def sync_with_other_vms(role: str) -> None: 

194 update_firestore("update_firestore::status=syncing up") 

195 update_firestore("update_firestore::task=Syncing up machines") 

196 print("Begin syncing up") 

197 # wait until all participants have the status of starting data sharing protocol 

198 while True: 

199 doc_ref_dict: dict = get_doc_ref_dict() 

200 statuses = doc_ref_dict["status"].values() 

201 if all(status == "syncing up" for status in statuses): 

202 break 

203 print("Waiting for all participants to sync up...") 

204 time.sleep(5) 

205 time.sleep(15 + 15 * int(role)) 

206 print("Finished syncing up") 

207 

208 

209def start_datasharing(role: str, demo: bool) -> None: 

210 update_firestore("update_firestore::task=Performing data sharing protocol") 

211 print("\n\n starting data sharing protocol \n\n") 

212 if demo: 

213 command = f"cd {constants.EXECUTABLES_PREFIX}secure-gwas/code && bash run_example_datasharing.sh" 

214 else: 

215 command = f"export PYTHONUNBUFFERED=TRUE && cd {constants.EXECUTABLES_PREFIX}secure-gwas/code && bin/DataSharingClient '{role}' ../par/test.par.'{role}'.txt" 

216 if role != "0": 

217 command += " ../test_data/" 

218 run_command(command, fail_message="Failed MPC-GWAS data sharing protocol") 

219 print("\n\n Finished data sharing protocol\n\n") 

220 

221 

222def start_gwas(role: str, demo: bool) -> None: 

223 update_firestore("update_firestore::task=Performing GWAS protocol") 

224 print("Sleeping before starting GWAS") 

225 time.sleep(100 + 30 * int(role)) 

226 print("\n\n starting GWAS \n\n") 

227 update_firestore("update_firestore::status=starting GWAS") 

228 if demo: 

229 command = f"cd {constants.EXECUTABLES_PREFIX}secure-gwas/code && bash run_example_gwas.sh" 

230 else: 

231 command = f"export PYTHONUNBUFFERED=TRUE && cd {constants.EXECUTABLES_PREFIX}secure-gwas/code && bin/GwasClient '{role}' ../par/test.par.'{role}'.txt" 

232 run_command(command, fail_message="Failed MPC-GWAS protocol") 

233 print("\n\n Finished GWAS \n\n") 

234 

235 if role != "0": 

236 process_output_files(role, demo) 

237 else: 

238 update_firestore("update_firestore::status=Finished protocol!") 

239 

240 

241def process_output_files(role: str, demo: bool) -> None: 

242 # sourcery skip: assign-if-exp, introduce-default-else, swap-if-expression 

243 doc_ref_dict = get_doc_ref_dict() 

244 num_inds_total = 1_000 

245 if not demo: 

246 num_inds_total = sum( 

247 int(doc_ref_dict["personal_parameters"][user]["NUM_INDS"]["value"]) 

248 for user in doc_ref_dict["participants"] 

249 ) 

250 num_covs = int(doc_ref_dict["parameters"]["NUM_COVS"]["value"]) 

251 

252 postprocess_assoc( 

253 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/new_assoc.txt", 

254 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/test_assoc.txt", 

255 f"{constants.EXECUTABLES_PREFIX}secure-gwas/test_data/pos.txt", 

256 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/test_gkeep1.txt", 

257 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/test_gkeep2.txt", 

258 num_inds_total, 

259 num_covs, 

260 ) 

261 plot_assoc( 

262 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/manhattan.png", 

263 f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/new_assoc.txt", 

264 ) 

265 

266 doc_ref_dict: dict = get_doc_ref_dict() 

267 user_id: str = doc_ref_dict["participants"][int(role)] 

268 

269 relevant_paths = [f"{constants.EXECUTABLES_PREFIX}secure-gwas/out"] 

270 copy_to_out_folder(relevant_paths) 

271 

272 if results_path := doc_ref_dict["personal_parameters"][user_id].get("RESULTS_PATH", {}).get("value", ""): 

273 copy_results_to_cloud_storage(role, results_path, f"{constants.EXECUTABLES_PREFIX}secure-gwas/out") 

274 

275 send_results: str = doc_ref_dict["personal_parameters"][user_id].get("SEND_RESULTS", {}).get("value") 

276 if send_results == "Yes": 

277 with open(f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/new_assoc.txt", "r") as file: 

278 website_send_file(file, "new_assoc.txt") 

279 

280 with open(f"{constants.EXECUTABLES_PREFIX}secure-gwas/out/manhattan.png", "rb") as file: 

281 website_send_file(file, "manhattan.png") 

282 

283 update_firestore("update_firestore::status=Finished protocol!")