Coverage for sfkit/utils/sfgwas_helper_functions.py: 100%

115 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-07 15:59 -0400

1import os 

2import re 

3import select 

4import shutil 

5import subprocess 

6from typing import Tuple, Union 

7 

8import matplotlib.pyplot as plt 

9import numpy as np 

10import requests 

11from bs4 import BeautifulSoup 

12 

13from sfkit.api import get_doc_ref_dict, update_firestore, website_send_file 

14from sfkit.utils import constants 

15from sfkit.utils.helper_functions import ( 

16 condition_or_fail, 

17 copy_results_to_cloud_storage, 

18 copy_to_out_folder, 

19 plot_assoc, 

20 postprocess_assoc, 

21) 

22 

23 

24def get_file_paths() -> Tuple[str, str]: 

25 with open(os.path.join(constants.SFKIT_DIR, "data_path.txt"), "r") as f: 

26 geno_file_prefix = f.readline().rstrip() 

27 data_path = f.readline().rstrip() 

28 return geno_file_prefix, data_path 

29 

30 

31def use_existing_config(role: str, doc_ref_dict: dict) -> None: 

32 print("Using blocks with config files") 

33 if role != "0": 

34 _, data_path = get_file_paths() 

35 

36 source = f"{data_path}/p{role}/for_sfgwas" 

37 destination = f"{constants.EXECUTABLES_PREFIX}sfgwas/for_sfgwas" 

38 move(source, destination) 

39 

40 config = doc_ref_dict["description"].split(constants.BLOCKS_MODE)[1] 

41 

42 source = f"{constants.EXECUTABLES_PREFIX}sfgwas/config/blocks/{config}" 

43 destination = f"{constants.EXECUTABLES_PREFIX}sfgwas/config/gwas" 

44 move(source, destination) 

45 

46 

47def move(source: str, destination: str) -> None: 

48 print(f"Moving {source} to {destination}...") 

49 shutil.rmtree(destination, ignore_errors=True) 

50 shutil.move(source, destination) 

51 

52 

53def run_sfgwas_with_task_updates(command: str, protocol: str, demo: bool, role: str) -> None: 

54 env = os.environ.copy() 

55 if protocol == "SF-GWAS": 

56 env["PROTOCOL"] = "gwas" 

57 elif protocol == "PCA": 

58 env["PROTOCOL"] = "pca" 

59 

60 process = subprocess.Popen( 

61 command, 

62 stdout=subprocess.PIPE, 

63 stderr=subprocess.PIPE, 

64 shell=True, 

65 executable="/bin/bash", 

66 env=env, 

67 ) 

68 

69 timeout = 86_400 

70 while process.poll() is None: 

71 rlist, _, _ = select.select([process.stdout, process.stderr], [], [], timeout) 

72 

73 if not rlist: 

74 process.kill() 

75 if timeout == 86_400: 

76 print("WARNING: sfgwas has been stalling for 24 hours. Killing process.") 

77 condition_or_fail(False, f"{protocol} protocol has been stalling for 24 hours. Killing process.") 

78 return 

79 

80 for stream in rlist: 

81 line = stream.readline().decode("utf-8").strip() 

82 print(line) 

83 if constants.SFKIT_PREFIX in line: 

84 update_firestore(f"update_firestore::task={line.split(constants.SFKIT_PREFIX)[1]}") 

85 elif "Output collectively decrypted and saved to" in line or ( 

86 protocol == "PCA" and f"Saved data to cache/party{role}/Qpc.txt" in line 

87 ): 

88 timeout = 30 

89 

90 check_for_failure(command, protocol, process, stream, line) 

91 

92 process.wait() 

93 

94 

95def check_for_failure(command: str, protocol: str, process: subprocess.Popen, stream: list, line: str) -> None: 

96 if ( 

97 stream == process.stderr 

98 and line 

99 and not line.startswith("W :") 

100 and "[watchdog] gc finished" not in line 

101 and "warning:" not in line 

102 ): 

103 print(f"FAILED - {command}") 

104 print(f"Stderr: {line}") 

105 condition_or_fail(False, f"Failed {protocol} protocol") 

106 

107 

108def post_process_results(role: str, demo: bool, protocol: str) -> None: 

109 doc_ref_dict: dict = get_doc_ref_dict() 

110 user_id: str = doc_ref_dict["participants"][int(role)] 

111 

112 if protocol == "SF-GWAS": 

113 make_new_assoc_and_manhattan_plot(doc_ref_dict, demo, role) 

114 elif protocol == "PCA": 

115 make_pca_plot(role) 

116 

117 if results_path := doc_ref_dict["personal_parameters"][user_id].get("RESULTS_PATH", {}).get("value", ""): 

118 copy_results_to_cloud_storage(role, results_path, f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}") 

119 

120 relevant_paths = [ 

121 f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}", 

122 f"{constants.EXECUTABLES_PREFIX}sfgwas/cache/party{role}/Qpc.txt", 

123 f"{constants.EXECUTABLES_PREFIX}sfgwas/stdout_party{role}.txt", 

124 ] 

125 copy_to_out_folder(relevant_paths) 

126 

127 send_results: str = doc_ref_dict["personal_parameters"][user_id].get("SEND_RESULTS", {}).get("value") 

128 if protocol == "SF-GWAS" and send_results == "Yes": 

129 with open(f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/new_assoc.txt", "r") as f: 

130 website_send_file(f, "new_assoc.txt") 

131 

132 with open(f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/manhattan.png", "rb") as f: 

133 website_send_file(f, "manhattan.png") 

134 elif protocol == "PCA" and send_results == "Yes": 

135 with open(f"{constants.EXECUTABLES_PREFIX}sfgwas/cache/party{role}/Qpc.txt", "r") as f: 

136 website_send_file(f, "Qpc.txt") 

137 

138 with open(f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/pca_plot.png", "rb") as f: 

139 website_send_file(f, "pca_plot.png") 

140 

141 update_firestore("update_firestore::status=Finished protocol!") 

142 

143 

144def make_pca_plot(role: str) -> None: 

145 pcs = np.loadtxt(f"{constants.EXECUTABLES_PREFIX}sfgwas/cache/party{role}/Qpc.txt", delimiter=",") 

146 plt.scatter(pcs[0], pcs[1]) 

147 plt.xlabel("PC1") 

148 plt.ylabel("PC2") 

149 plt.savefig(f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/pca_plot.png") 

150 

151 

152def make_new_assoc_and_manhattan_plot(doc_ref_dict: dict, demo: bool, role: str) -> None: 

153 # sourcery skip: assign-if-exp, introduce-default-else, swap-if-expression 

154 num_inds_total = 2000 

155 if not demo: 

156 num_inds_total = sum( 

157 int(doc_ref_dict["personal_parameters"][user]["NUM_INDS"]["value"]) 

158 for user in doc_ref_dict["participants"] 

159 ) 

160 num_covs = int(doc_ref_dict["parameters"]["num_covs"]["value"]) 

161 

162 snp_pos_path = f"{constants.EXECUTABLES_PREFIX}sfgwas/example_data/party{role}/snp_pos.txt" 

163 if not demo: 

164 with open(os.path.join(constants.SFKIT_DIR, "data_path.txt"), "r") as f: 

165 f.readline() 

166 data_path = f.readline().rstrip() 

167 snp_pos_path = f"{constants.EXECUTABLES_PREFIX}{data_path}/snp_pos.txt" 

168 

169 postprocess_assoc( 

170 f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/new_assoc.txt", 

171 f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/assoc.txt", 

172 snp_pos_path, 

173 f"{constants.EXECUTABLES_PREFIX}sfgwas/cache/party{role}/gkeep.txt", 

174 "", 

175 num_inds_total, 

176 num_covs, 

177 ) 

178 plot_assoc( 

179 f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/manhattan.png", 

180 f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/new_assoc.txt", 

181 ) 

182 

183 

184def to_float_int_or_bool(string: str) -> Union[float, int, bool, str]: 

185 if string.lower() in {"true", "false"}: 

186 return string.lower() == "true" 

187 try: 

188 return int(string) 

189 except ValueError: 

190 try: 

191 return float(string) 

192 except ValueError: 

193 return string