Coverage for sfkit/utils/helper_functions.py: 100%
86 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-07 15:11 -0400
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-07 15:11 -0400
1import os
2import select
3import shutil
4import subprocess
6import matplotlib.pyplot as plt
7import numpy as np
8import pandas as pd
9from google.cloud import storage
10from qmplot import manhattanplot
11from scipy.stats import chi2
13from sfkit.api import update_firestore
14from sfkit.utils import constants
17def authenticate_user() -> None:
18 if not os.path.exists(constants.AUTH_KEY):
19 print("You have not authenticated. Please run 'sfkit auth' to authenticate.")
20 exit(1)
23def run_command(command: str, fail_message: str = "") -> None:
24 with subprocess.Popen(
25 command, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
26 ) as proc:
27 while proc.poll() is None:
28 readable, _, _ = select.select([proc.stdout, proc.stderr], [], [])
30 for stream in readable:
31 if line := stream.readline().rstrip():
32 print(line)
34 res = proc.returncode
36 if res != 0:
37 print(f"FAILED - {command}")
38 print(f"Return code: {res}")
39 condition_or_fail(False, fail_message)
42def condition_or_fail(condition: bool, message: str = "The sfkit process has failed.") -> None:
43 if not condition:
44 message = f"FAILED - {message}"
45 print(message)
46 update_firestore(f"update_firestore::status={message}")
47 exit(0) # 0 so that the wrapper doesn't override the status with a more generic error
50def postprocess_assoc(
51 new_assoc_file: str,
52 assoc_file: str,
53 pos_file: str,
54 gkeep1_file: str,
55 gkeep2_file: str,
56 num_ind_total: int,
57 num_cov: int,
58) -> None:
59 # new_assoc_file: Name of new assoc file
60 # assoc_file: Name of original assoc file
61 # pos_file: Path to pos.txt
62 # gkeep1_file: Path to gkeep1.txt
63 # gkeep2_file: Path to gkeep2.txt
64 # num_ind_total: Total number of individuals
65 # num_cov: Number of covariates
67 # Combine filters
68 gkeep1 = np.loadtxt(gkeep1_file, dtype=bool)
69 if gkeep2_file != "":
70 gkeep2 = np.loadtxt(gkeep2_file, dtype=bool)
71 gkeep1[gkeep1] = gkeep2
73 # Load and check dimension of output association stats
74 assoc = np.loadtxt(assoc_file)
75 assert len(assoc) == gkeep1.sum()
77 # Calculate p-values
78 t2 = (assoc**2) * (num_ind_total - num_cov) / (1 - assoc**2 + 1e-10)
79 log10p = np.log10(chi2.sf(t2, df=1))
81 # Append SNP position information and write to a new file
82 lineno = 0
83 assoc_idx = 0
85 with open(new_assoc_file, "w") as out:
86 out.write("\t".join(["#CHROM", "POS", "R", "LOG10P"]) + "\n")
88 for line in open(pos_file):
89 pos = line.strip().split()
91 if gkeep1[lineno]:
92 out.write(pos[0] + "\t" + pos[1] + "\t" + str(assoc[assoc_idx]) + "\t" + str(log10p[assoc_idx]) + "\n")
93 assoc_idx += 1
95 lineno += 1
98def plot_assoc(plot_file: str, new_assoc_file: str) -> None:
99 # Load postprocessed assoc file and convert p-values
100 tab = pd.read_table(new_assoc_file)
101 tab["P"] = 10 ** tab["LOG10P"]
103 # Create a Manhattan plot
104 plt.figure()
105 manhattanplot(
106 data=tab,
107 suggestiveline=None, # type: ignore
108 genomewideline=None, # type: ignore
109 marker=".",
110 xticklabel_kws={"rotation": "vertical"}, # set vertical or any other degrees as you like.
111 )
112 plt.savefig(plot_file)
115def copy_results_to_cloud_storage(role: str, data_path: str, output_directory: str) -> None:
116 os.makedirs(output_directory, exist_ok=True)
117 if "sfgwas" in output_directory:
118 shutil.copyfile(
119 f"{constants.EXECUTABLES_PREFIX}sfgwas/cache/party{role}/Qpc.txt", f"{output_directory}/Qpc.txt"
120 )
122 try:
123 storage_client = storage.Client()
124 bucket_name, prefix = data_path.split("/", 1)
125 bucket = storage_client.bucket(bucket_name)
126 for file in os.listdir(output_directory):
127 blob = bucket.blob(f"{prefix}/out/party{role}/{file}")
128 blob.upload_from_filename(f"{output_directory}/{file}")
129 print(f"Successfully uploaded results from {output_directory} to gs://{data_path}/out/party{role}")
130 except Exception as e:
131 print("Failed to upload results to cloud storage")
132 print(e)
135def copy_to_out_folder(relevant_paths: list) -> None:
136 """
137 Overwrite the contents of the out folder with the files/folders in relevant_paths
138 """
139 if not os.path.exists(constants.OUT_FOLDER):
140 os.makedirs(constants.OUT_FOLDER)
142 for path in relevant_paths:
143 if os.path.exists(path):
144 destination = f"{constants.OUT_FOLDER}/{os.path.basename(path)}"
145 if os.path.isfile(path):
146 shutil.copy2(path, destination)
147 elif os.path.isdir(path):
148 if os.path.exists(destination):
149 shutil.rmtree(destination)
150 shutil.copytree(path, destination)