Coverage for sfkit/utils/helper_functions.py: 100%

86 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-07 15:11 -0400

1import os 

2import select 

3import shutil 

4import subprocess 

5 

6import matplotlib.pyplot as plt 

7import numpy as np 

8import pandas as pd 

9from google.cloud import storage 

10from qmplot import manhattanplot 

11from scipy.stats import chi2 

12 

13from sfkit.api import update_firestore 

14from sfkit.utils import constants 

15 

16 

17def authenticate_user() -> None: 

18 if not os.path.exists(constants.AUTH_KEY): 

19 print("You have not authenticated. Please run 'sfkit auth' to authenticate.") 

20 exit(1) 

21 

22 

23def run_command(command: str, fail_message: str = "") -> None: 

24 with subprocess.Popen( 

25 command, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True 

26 ) as proc: 

27 while proc.poll() is None: 

28 readable, _, _ = select.select([proc.stdout, proc.stderr], [], []) 

29 

30 for stream in readable: 

31 if line := stream.readline().rstrip(): 

32 print(line) 

33 

34 res = proc.returncode 

35 

36 if res != 0: 

37 print(f"FAILED - {command}") 

38 print(f"Return code: {res}") 

39 condition_or_fail(False, fail_message) 

40 

41 

42def condition_or_fail(condition: bool, message: str = "The sfkit process has failed.") -> None: 

43 if not condition: 

44 message = f"FAILED - {message}" 

45 print(message) 

46 update_firestore(f"update_firestore::status={message}") 

47 exit(0) # 0 so that the wrapper doesn't override the status with a more generic error 

48 

49 

50def postprocess_assoc( 

51 new_assoc_file: str, 

52 assoc_file: str, 

53 pos_file: str, 

54 gkeep1_file: str, 

55 gkeep2_file: str, 

56 num_ind_total: int, 

57 num_cov: int, 

58) -> None: 

59 # new_assoc_file: Name of new assoc file 

60 # assoc_file: Name of original assoc file 

61 # pos_file: Path to pos.txt 

62 # gkeep1_file: Path to gkeep1.txt 

63 # gkeep2_file: Path to gkeep2.txt 

64 # num_ind_total: Total number of individuals 

65 # num_cov: Number of covariates 

66 

67 # Combine filters 

68 gkeep1 = np.loadtxt(gkeep1_file, dtype=bool) 

69 if gkeep2_file != "": 

70 gkeep2 = np.loadtxt(gkeep2_file, dtype=bool) 

71 gkeep1[gkeep1] = gkeep2 

72 

73 # Load and check dimension of output association stats 

74 assoc = np.loadtxt(assoc_file) 

75 assert len(assoc) == gkeep1.sum() 

76 

77 # Calculate p-values 

78 t2 = (assoc**2) * (num_ind_total - num_cov) / (1 - assoc**2 + 1e-10) 

79 log10p = np.log10(chi2.sf(t2, df=1)) 

80 

81 # Append SNP position information and write to a new file 

82 lineno = 0 

83 assoc_idx = 0 

84 

85 with open(new_assoc_file, "w") as out: 

86 out.write("\t".join(["#CHROM", "POS", "R", "LOG10P"]) + "\n") 

87 

88 for line in open(pos_file): 

89 pos = line.strip().split() 

90 

91 if gkeep1[lineno]: 

92 out.write(pos[0] + "\t" + pos[1] + "\t" + str(assoc[assoc_idx]) + "\t" + str(log10p[assoc_idx]) + "\n") 

93 assoc_idx += 1 

94 

95 lineno += 1 

96 

97 

98def plot_assoc(plot_file: str, new_assoc_file: str) -> None: 

99 # Load postprocessed assoc file and convert p-values 

100 tab = pd.read_table(new_assoc_file) 

101 tab["P"] = 10 ** tab["LOG10P"] 

102 

103 # Create a Manhattan plot 

104 plt.figure() 

105 manhattanplot( 

106 data=tab, 

107 suggestiveline=None, # type: ignore 

108 genomewideline=None, # type: ignore 

109 marker=".", 

110 xticklabel_kws={"rotation": "vertical"}, # set vertical or any other degrees as you like. 

111 ) 

112 plt.savefig(plot_file) 

113 

114 

115def copy_results_to_cloud_storage(role: str, data_path: str, output_directory: str) -> None: 

116 os.makedirs(output_directory, exist_ok=True) 

117 if "sfgwas" in output_directory: 

118 shutil.copyfile( 

119 f"{constants.EXECUTABLES_PREFIX}sfgwas/cache/party{role}/Qpc.txt", f"{output_directory}/Qpc.txt" 

120 ) 

121 

122 try: 

123 storage_client = storage.Client() 

124 bucket_name, prefix = data_path.split("/", 1) 

125 bucket = storage_client.bucket(bucket_name) 

126 for file in os.listdir(output_directory): 

127 blob = bucket.blob(f"{prefix}/out/party{role}/{file}") 

128 blob.upload_from_filename(f"{output_directory}/{file}") 

129 print(f"Successfully uploaded results from {output_directory} to gs://{data_path}/out/party{role}") 

130 except Exception as e: 

131 print("Failed to upload results to cloud storage") 

132 print(e) 

133 

134 

135def copy_to_out_folder(relevant_paths: list) -> None: 

136 """ 

137 Overwrite the contents of the out folder with the files/folders in relevant_paths 

138 """ 

139 if not os.path.exists(constants.OUT_FOLDER): 

140 os.makedirs(constants.OUT_FOLDER) 

141 

142 for path in relevant_paths: 

143 if os.path.exists(path): 

144 destination = f"{constants.OUT_FOLDER}/{os.path.basename(path)}" 

145 if os.path.isfile(path): 

146 shutil.copy2(path, destination) 

147 elif os.path.isdir(path): 

148 if os.path.exists(destination): 

149 shutil.rmtree(destination) 

150 shutil.copytree(path, destination)