Coverage for sfkit/utils/sfgwas_helper_functions.py: 100%
115 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-07 15:59 -0400
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-07 15:59 -0400
1import os
2import re
3import select
4import shutil
5import subprocess
6from typing import Tuple, Union
8import matplotlib.pyplot as plt
9import numpy as np
10import requests
11from bs4 import BeautifulSoup
13from sfkit.api import get_doc_ref_dict, update_firestore, website_send_file
14from sfkit.utils import constants
15from sfkit.utils.helper_functions import (
16 condition_or_fail,
17 copy_results_to_cloud_storage,
18 copy_to_out_folder,
19 plot_assoc,
20 postprocess_assoc,
21)
24def get_file_paths() -> Tuple[str, str]:
25 with open(os.path.join(constants.SFKIT_DIR, "data_path.txt"), "r") as f:
26 geno_file_prefix = f.readline().rstrip()
27 data_path = f.readline().rstrip()
28 return geno_file_prefix, data_path
31def use_existing_config(role: str, doc_ref_dict: dict) -> None:
32 print("Using blocks with config files")
33 if role != "0":
34 _, data_path = get_file_paths()
36 source = f"{data_path}/p{role}/for_sfgwas"
37 destination = f"{constants.EXECUTABLES_PREFIX}sfgwas/for_sfgwas"
38 move(source, destination)
40 config = doc_ref_dict["description"].split(constants.BLOCKS_MODE)[1]
42 source = f"{constants.EXECUTABLES_PREFIX}sfgwas/config/blocks/{config}"
43 destination = f"{constants.EXECUTABLES_PREFIX}sfgwas/config/gwas"
44 move(source, destination)
47def move(source: str, destination: str) -> None:
48 print(f"Moving {source} to {destination}...")
49 shutil.rmtree(destination, ignore_errors=True)
50 shutil.move(source, destination)
53def run_sfgwas_with_task_updates(command: str, protocol: str, demo: bool, role: str) -> None:
54 env = os.environ.copy()
55 if protocol == "SF-GWAS":
56 env["PROTOCOL"] = "gwas"
57 elif protocol == "PCA":
58 env["PROTOCOL"] = "pca"
60 process = subprocess.Popen(
61 command,
62 stdout=subprocess.PIPE,
63 stderr=subprocess.PIPE,
64 shell=True,
65 executable="/bin/bash",
66 env=env,
67 )
69 timeout = 86_400
70 while process.poll() is None:
71 rlist, _, _ = select.select([process.stdout, process.stderr], [], [], timeout)
73 if not rlist:
74 process.kill()
75 if timeout == 86_400:
76 print("WARNING: sfgwas has been stalling for 24 hours. Killing process.")
77 condition_or_fail(False, f"{protocol} protocol has been stalling for 24 hours. Killing process.")
78 return
80 for stream in rlist:
81 line = stream.readline().decode("utf-8").strip()
82 print(line)
83 if constants.SFKIT_PREFIX in line:
84 update_firestore(f"update_firestore::task={line.split(constants.SFKIT_PREFIX)[1]}")
85 elif "Output collectively decrypted and saved to" in line or (
86 protocol == "PCA" and f"Saved data to cache/party{role}/Qpc.txt" in line
87 ):
88 timeout = 30
90 check_for_failure(command, protocol, process, stream, line)
92 process.wait()
95def check_for_failure(command: str, protocol: str, process: subprocess.Popen, stream: list, line: str) -> None:
96 if (
97 stream == process.stderr
98 and line
99 and not line.startswith("W :")
100 and "[watchdog] gc finished" not in line
101 and "warning:" not in line
102 ):
103 print(f"FAILED - {command}")
104 print(f"Stderr: {line}")
105 condition_or_fail(False, f"Failed {protocol} protocol")
108def post_process_results(role: str, demo: bool, protocol: str) -> None:
109 doc_ref_dict: dict = get_doc_ref_dict()
110 user_id: str = doc_ref_dict["participants"][int(role)]
112 if protocol == "SF-GWAS":
113 make_new_assoc_and_manhattan_plot(doc_ref_dict, demo, role)
114 elif protocol == "PCA":
115 make_pca_plot(role)
117 if results_path := doc_ref_dict["personal_parameters"][user_id].get("RESULTS_PATH", {}).get("value", ""):
118 copy_results_to_cloud_storage(role, results_path, f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}")
120 relevant_paths = [
121 f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}",
122 f"{constants.EXECUTABLES_PREFIX}sfgwas/cache/party{role}/Qpc.txt",
123 f"{constants.EXECUTABLES_PREFIX}sfgwas/stdout_party{role}.txt",
124 ]
125 copy_to_out_folder(relevant_paths)
127 send_results: str = doc_ref_dict["personal_parameters"][user_id].get("SEND_RESULTS", {}).get("value")
128 if protocol == "SF-GWAS" and send_results == "Yes":
129 with open(f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/new_assoc.txt", "r") as f:
130 website_send_file(f, "new_assoc.txt")
132 with open(f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/manhattan.png", "rb") as f:
133 website_send_file(f, "manhattan.png")
134 elif protocol == "PCA" and send_results == "Yes":
135 with open(f"{constants.EXECUTABLES_PREFIX}sfgwas/cache/party{role}/Qpc.txt", "r") as f:
136 website_send_file(f, "Qpc.txt")
138 with open(f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/pca_plot.png", "rb") as f:
139 website_send_file(f, "pca_plot.png")
141 update_firestore("update_firestore::status=Finished protocol!")
144def make_pca_plot(role: str) -> None:
145 pcs = np.loadtxt(f"{constants.EXECUTABLES_PREFIX}sfgwas/cache/party{role}/Qpc.txt", delimiter=",")
146 plt.scatter(pcs[0], pcs[1])
147 plt.xlabel("PC1")
148 plt.ylabel("PC2")
149 plt.savefig(f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/pca_plot.png")
152def make_new_assoc_and_manhattan_plot(doc_ref_dict: dict, demo: bool, role: str) -> None:
153 # sourcery skip: assign-if-exp, introduce-default-else, swap-if-expression
154 num_inds_total = 2000
155 if not demo:
156 num_inds_total = sum(
157 int(doc_ref_dict["personal_parameters"][user]["NUM_INDS"]["value"])
158 for user in doc_ref_dict["participants"]
159 )
160 num_covs = int(doc_ref_dict["parameters"]["num_covs"]["value"])
162 snp_pos_path = f"{constants.EXECUTABLES_PREFIX}sfgwas/example_data/party{role}/snp_pos.txt"
163 if not demo:
164 with open(os.path.join(constants.SFKIT_DIR, "data_path.txt"), "r") as f:
165 f.readline()
166 data_path = f.readline().rstrip()
167 snp_pos_path = f"{constants.EXECUTABLES_PREFIX}{data_path}/snp_pos.txt"
169 postprocess_assoc(
170 f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/new_assoc.txt",
171 f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/assoc.txt",
172 snp_pos_path,
173 f"{constants.EXECUTABLES_PREFIX}sfgwas/cache/party{role}/gkeep.txt",
174 "",
175 num_inds_total,
176 num_covs,
177 )
178 plot_assoc(
179 f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/manhattan.png",
180 f"{constants.EXECUTABLES_PREFIX}sfgwas/out/party{role}/new_assoc.txt",
181 )
184def to_float_int_or_bool(string: str) -> Union[float, int, bool, str]:
185 if string.lower() in {"true", "false"}:
186 return string.lower() == "true"
187 try:
188 return int(string)
189 except ValueError:
190 try:
191 return float(string)
192 except ValueError:
193 return string