acgc.igra

Functions for reading and using IGRA v2 radiosonde data files

  1#!/usr/bin/env python3
  2# -*- coding: utf-8 -*-
  3'''Functions for reading and using IGRA v2 radiosonde data files
  4'''
  5
  6import datetime as dt
  7import numpy as np
  8import pandas as pd
  9
 10def read_igra_country(file):
 11    '''Read file of IGRA country codes
 12    
 13    Parameters
 14    ----------
 15    file : str
 16        name/path of file to read
 17        
 18    Returns
 19    -------
 20    pandas.DataFrame 
 21        Contains columns: 
 22        - ``countryID`` (str) : 2-letter code
 23        - ``countryName`` (str) : full name
 24    '''
 25
 26    country = pd.read_fwf( file, header=None, 
 27            widths=[2,41],
 28            names=['countryID','countryName'] )
 29
 30    return country
 31
 32def read_igra_stations(file):
 33    '''Read file of IGRA station coordinates and dates of observations
 34    
 35    Parameters
 36    ----------
 37    file : str
 38        name/path of file to read
 39        
 40    Returns
 41    -------
 42    pandas.DataFrame
 43        Contains columns:
 44        - siteID : IGRA station code
 45        - countryID : 2-letter country code
 46        - lat : latitude
 47        - lon : longitude
 48        - elev : elevation, m
 49        - state : if applicable
 50        - name : station name
 51        - firstyear : first year with observations
 52        - lastyear : final year with observations
 53        - nobs : number of sounding columns
 54    '''
 55
 56    stations = pd.read_fwf( file,
 57                            header=None,
 58                            na_values={'lat':-98.8888,'lon':-998.8888,'elev':[-999.9,-998.8]},
 59                            widths=[11,9,10,7,3,31,5,5,7], 
 60                            names=['siteID','lat','lon','elev',
 61                                   'state','name','firstyear','lastyear','nobs'] )
 62
 63    # Country code is the first two characters of the siteID
 64    stations['countryID'] = stations.siteID.str[0:2]
 65
 66    return stations
 67
 68def read_igra_file( file, derived=False, readprofiles=True ):
 69    '''Read file with IGRA soundings
 70
 71    This function can read IGRA files with and without derived variables.
 72    If only the derived variables are needed, reading the profiles can be skipped to improve speed.
 73    
 74    Parameters
 75    ----------
 76    file : str
 77        name/path of file to read
 78    derived : bool
 79        Set derived=True for files with derived variables and 
 80        derived=False (default) for files with only profile measurements
 81    readprofiles : bool
 82        Set readprofiles=False to skip reading the profiles. 
 83        Then, only header info and derived variables will be read
 84    
 85    Returns
 86    -------
 87    pandas.Dataframe 
 88        Containing columns
 89        - siteID  = ID code 
 90        - syntime = Nominal synoptic time (0Z or 12Z) of launch
 91        - time    = Actual launch time, if provided, otherwise same as synoptic time
 92        - numlev  = number of measurement levels in the profile
 93        - profile = sub-dataframe containing vertical profile (numlev rows). 
 94            See below for data columns. profile is only present when readprofiles=True
 95
 96        For "derived" files, the returned DataFrame also contains the following profile summary columns        
 97        - pw         = precipitable water, mm
 98        - invpress   = inversion pressure, hPa
 99        - invhgt     = inversion height, m AGL
100        - invtempdif = temperature difference from surface to inversion, K
101        - mixpress   = pressure at top of mixed layer (parcel method), hPa
102        - mixhgt     = height of mixed layer, m AGL
103        - frzpress   = pressure at freezing level, hPa
104        - frzhgt     = height of mixing level, m AGL
105        - lclpress   = pressure at the LCL, hPa
106        - lclhgt     = height of the LCL, m AGL
107        - lfcpress   = pressure of the LFC, hPa
108        - lfchgt     = height of the LFC, m AGL
109        - lnbpress   = pressure of the LNB, hPa
110        - lnbhgt     = height of LNB, m AGL
111        - LI         = Lifted index, C
112        - SI         = Showalter index, C
113        - KI         = K index, C
114        - TTI        = Total totals index, C
115        - CAPE       = CAPE, J/kg
116        - CIN        = Convective inhibition, J/kg
117
118        The "profile" field is a sub-DataFrame containing the following variables and "numlev" rows"
119        - p       = pressure, hPa 
120        - z       = altitude, m
121        - T       = temperature, C
122        - Td      = dewpoint, C
123        - RH      = relative humidity, %
124        - dpdp    = dewpoint depression, C
125        - wdir    = wind direction, 0-360 degrees
126        - wspd    = wind speed, m/s
127        - pflag   = pressure flag, see IGRA documentation
128        - zflag   = altitude flag, see IGRA documentation
129        - Tflag   = temperature flag, see IGRA documentation
130        For "derived" files, "profile" also contains the following variables
131        - zrep    = ?
132        - Tgrad   = ?
133        - Tpot    = potential temperature, K?
134        - Tpotgrad = ?
135        - Tvirt   = virtual temperature, K?
136        - Tvirtpot= virtual potential temperature, K?
137        - e       = water vapor pressure, hPa
138        - es      = saturation water vapor pressure, hPa
139        - RHrep   = ?
140        - RHgrad  = ?
141        - u       = eastward component of wind, m/s
142        - v       = northward component of wind, m/s
143        - ugrad   = ? m/s
144        - vgrad   = ? m/s
145        - N       = ?
146    '''
147
148    # Try to read the data from pickle; Read it from ascii file if pickle doesn't exist
149    try:
150        data = pd.read_pickle(file+'.pkl')
151    except FileNotFoundError:
152
153        # line number counter
154        lnum = 0
155
156        # profile number
157        pnum = 0
158
159        # Define empty data frame
160        data = pd.DataFrame(columns=('siteID','time','syntime','profile'))
161
162        first = True
163
164        basewidth = [12,5,3,3,3,5,5]
165        basenames = ['siteID','year','month','day','hour','reltime','numlev']
166
167        # Open file for line-by-line reading
168        with open( file, 'r', encoding='ascii' ) as f:
169            for line in f:
170
171                # Increment line counter
172                lnum += 1
173
174                # Raise error if line doesn't begin with "#"
175                if line[0] != '#':
176                    print('Unexpected IGRA file format. Header lines should begin with "#"')
177                    print('line ',lnum,' in file ',file)
178                    print(line)
179                    raise ValueError()
180
181                # Fields that are the same for sounding and derived files
182                siteID     =      line[1:12]
183                year       = int( line[13:17] )
184                month      = int( line[18:20] )
185                day        = int( line[21:23] )
186                hour       = int( line[24:26] )
187                release_time = int( line[27:31] )
188                numlev     = int( line[31:36] )
189
190                # Extract hour and minute from release time
191                release_hour    = int( release_time / 100 )
192                release_min     = np.mod( release_time, 100 )
193
194                # Use the nominal time when release time is missing
195                if release_hour==99:
196                    release_hour = hour
197                if release_min==99:
198                    release_min = 0
199
200                # Actual launch time
201                time = dt.datetime( year, month, day, release_hour, release_min )
202
203                # Synoptic time (Typically 0Z or 12Z)
204                syntime = dt.datetime( year, month, day, hour )
205
206                # Read variables that differ between derived and standard files
207                if derived:
208
209                    # Header items for derived files
210                    pw         = float( line[37:43] ) / 100
211                    invpress   = float( line[43:49] ) / 100
212                    invhgt     = float( line[49:55] )
213                    invtempdif = float( line[55:61] ) / 10
214                    mixpress   = float( line[61:67] ) / 100
215                    mixhgt     = float( line[67:73] )
216                    frzpress   = float( line[73:79] ) / 100
217                    frzhgt     = float( line[79:85] )
218                    lclpress   = float( line[85:91] ) / 100
219                    lclhgt     = float( line[91:97] )
220                    lfcpress   = float( line[97:103] ) / 100
221                    lfchgt     = float( line[103:109] )
222                    lnbpress   = float( line[109:115] ) / 100
223                    lnbhgt     = float( line[115:121] )
224                    LI         = float( line[121:127] )
225                    SI         = float( line[127:133] )
226                    KI         = float( line[133:139] )
227                    TTI        = float( line[139:145] )
228                    CAPE       = float( line[145:151] )
229                    CIN        = float( line[151:157] )
230
231                    # Profile metadata 
232                    info = { 'siteID':    siteID,
233                             'time':      time,
234                             'syntime':   syntime,
235                             'numlev':    numlev,
236                             'pw':        pw,
237                             'pInversion':invpress,
238                             'zInversion':invhgt,
239                             'dTinversion':invtempdif,
240                             'pMix':      mixpress,
241                             'zMix':      mixhgt,
242                             'pFreeze':   frzpress,
243                             'zFreeze':   frzhgt,
244                             'pLCL':      lclpress,
245                             'zLCL':      lclhgt,
246                             'pLFC':      lfcpress,
247                             'zLFC':      lfchgt,
248                             'pLNB':      lnbpress,
249                             'zLNB':      lnbhgt,
250                             'LI':      LI,
251                             'SI':      SI,
252                             'KI':      KI,
253                             'TTI':     TTI,
254                             'CAPE':    CAPE,
255                             'CIN':     CIN }
256
257                else:
258                    
259                    p_src  =        line[37:45]
260                    np_src =        line[46:54]
261                    lat    = float( line[55:62] ) / 1e4
262                    lon    = float( line[63:71] ) / 1e4
263
264                    # Profile metadata 
265                    info = { 'siteID':  siteID,
266                             'time':    time,
267                             'syntime': syntime,
268                             'numlev':  numlev }
269
270                # Replace missing data
271                for key in info.keys():
272                    if (info[key] in [-99999, -9999.9, -999.99]):
273                        info[key] = np.nan
274
275                # Print some info every 100 entries
276                if np.mod( pnum, 100 )==0:
277                    print('{:4d}-{:02d}-{:02d} {:02d}:{:02d}'.format(
278                        year, month, day, release_hour, release_min ))
279
280                # Read the vertical profile
281                if (readprofiles and derived):
282                    profile = pd.read_fwf( f, nrows=numlev,
283                                           header=None,
284                                           na_values=[-9999],
285                                           widths=[7]+[8]*18,
286                                           names=['p','zrep','z','T','Tgrad',
287                                                  'Tpot','Tpotgrad','Tvirt','Tvirtpot',
288                                                  'e','es',
289                                                  'RHrep','RH','RHgrad',
290                                                  'u','ugrad','v','vgrad','N'] )
291
292                    # Convert Pa -> hPa
293                    profile['p'] /= 100
294
295                    # Convert K*10 -> K
296                    profile[['T','Tgrad','Tpot','Tpotgrad','Tvirt','Tvirtpot']] /= 10 
297
298                    # Convert vapor pressure, hPa*1000 -> hPa
299                    profile[['e','es']] /= 1000
300
301                    # Convert %*10 -> %
302                    profile[['RH','RHrep']] /= 10
303
304                    # Convert m/s*10 -> m/s
305                    profile[['u','ugrad','v','vgrad']] /= 10
306
307                    # Add profile to data
308                    info.update({'profile': profile})
309
310                elif readprofiles:
311
312                    # Read the sounding
313                    # Units: p, Pa; z, m; T, C*10; RH, %*10; dpdp, C*10 (dewpoint depression);
314                    # wdir, degree; wspd, m/s*10
315                    profile = pd.read_fwf(f, nrows=numlev,
316                                          header=None,
317                                          na_values=[-8888,-9999], 
318                                          widths=[1,1,6,7,1,5,1,5,1,5,6,6,6],
319                                          names=['levtype1','levtype2','etime',
320                                                 'p','pflag','z','zflag','T','Tflag',
321                                                 'RH','dpdp','wdir','wspd'] )
322
323                    # Keep level types 1* (standard pressure), 2* (other pressure level)
324                    # Drop level type 3* (non-pressure levels)
325                    profile = profile[ profile.levtype1 != 3 ]
326
327                    # Convert Pa -> hPa
328                    profile['p'] /= 100
329
330                    # Convert C*10 -> C
331                    profile['T']     = profile['T'] / 10
332                    profile['dpdp'] /= 10
333
334                    # Convert %*10 -> %
335                    profile['RH'] /= 10
336
337                    # Convert m/s*10 -> m/s
338                    profile['wspd'] /= 10
339
340                    # Dewpoint, C
341                    profile['Td'] = profile['T'] - profile['dpdp']
342
343                    # Add profile to data
344                    info.update({'profile': profile})
345
346                else:
347                    # Don't read the profile
348                    # Skip the lines containing the profile
349                    for i in range(numlev):
350                        next(f)
351
352                # Increment line counter
353                lnum += numlev
354
355                # Increment profile number
356                pnum += 1
357
358                # Create an empty dataframe on first pass
359                if first:
360                    data = pd.DataFrame(columns=info.keys())
361                    first= False
362
363                # Add this datapoint; Use nominal time for the index
364                data.loc[syntime] = info
365
366        # Save data as pickle file
367        data.to_pickle(file+'.pkl')
368
369    return data
370
371def demo():
372    '''Read some sample data and plot it'''
373
374    import matplotlib.pyplot as plt
375
376    # Read the Barrow data
377    data = read_igra_file( 'data/Barrow_2000.txt' )
378
379    # Find the souding closest to 2000-06-01 12:00 UTC
380    idx = np.argmin( np.abs( data.index - dt.datetime(2000,6,1,0) ) )
381
382    profile = data.iloc[idx].profile
383
384    plt.clf()
385    plt.plot( profile['T'], profile['z'], label='T' )
386    plt.plot( profile['Td'], profile['z'], label='Td' )
387    plt.title( data.index[idx] )
388    plt.xlabel( 'Temperature, K' )
389    plt.ylabel( 'Altitude, m' )
390    plt.ylim((0,4000))
391    plt.legend()
def read_igra_country(file):
11def read_igra_country(file):
12    '''Read file of IGRA country codes
13    
14    Parameters
15    ----------
16    file : str
17        name/path of file to read
18        
19    Returns
20    -------
21    pandas.DataFrame 
22        Contains columns: 
23        - ``countryID`` (str) : 2-letter code
24        - ``countryName`` (str) : full name
25    '''
26
27    country = pd.read_fwf( file, header=None, 
28            widths=[2,41],
29            names=['countryID','countryName'] )
30
31    return country

Read file of IGRA country codes

Parameters
  • file (str): name/path of file to read
Returns
  • pandas.DataFrame: Contains columns:
    • countryID (str) : 2-letter code
    • countryName (str) : full name
def read_igra_stations(file):
33def read_igra_stations(file):
34    '''Read file of IGRA station coordinates and dates of observations
35    
36    Parameters
37    ----------
38    file : str
39        name/path of file to read
40        
41    Returns
42    -------
43    pandas.DataFrame
44        Contains columns:
45        - siteID : IGRA station code
46        - countryID : 2-letter country code
47        - lat : latitude
48        - lon : longitude
49        - elev : elevation, m
50        - state : if applicable
51        - name : station name
52        - firstyear : first year with observations
53        - lastyear : final year with observations
54        - nobs : number of sounding columns
55    '''
56
57    stations = pd.read_fwf( file,
58                            header=None,
59                            na_values={'lat':-98.8888,'lon':-998.8888,'elev':[-999.9,-998.8]},
60                            widths=[11,9,10,7,3,31,5,5,7], 
61                            names=['siteID','lat','lon','elev',
62                                   'state','name','firstyear','lastyear','nobs'] )
63
64    # Country code is the first two characters of the siteID
65    stations['countryID'] = stations.siteID.str[0:2]
66
67    return stations

Read file of IGRA station coordinates and dates of observations

Parameters
  • file (str): name/path of file to read
Returns
  • pandas.DataFrame: Contains columns:
    • siteID : IGRA station code
    • countryID : 2-letter country code
    • lat : latitude
    • lon : longitude
    • elev : elevation, m
    • state : if applicable
    • name : station name
    • firstyear : first year with observations
    • lastyear : final year with observations
    • nobs : number of sounding columns
def read_igra_file(file, derived=False, readprofiles=True):
 69def read_igra_file( file, derived=False, readprofiles=True ):
 70    '''Read file with IGRA soundings
 71
 72    This function can read IGRA files with and without derived variables.
 73    If only the derived variables are needed, reading the profiles can be skipped to improve speed.
 74    
 75    Parameters
 76    ----------
 77    file : str
 78        name/path of file to read
 79    derived : bool
 80        Set derived=True for files with derived variables and 
 81        derived=False (default) for files with only profile measurements
 82    readprofiles : bool
 83        Set readprofiles=False to skip reading the profiles. 
 84        Then, only header info and derived variables will be read
 85    
 86    Returns
 87    -------
 88    pandas.Dataframe 
 89        Containing columns
 90        - siteID  = ID code 
 91        - syntime = Nominal synoptic time (0Z or 12Z) of launch
 92        - time    = Actual launch time, if provided, otherwise same as synoptic time
 93        - numlev  = number of measurement levels in the profile
 94        - profile = sub-dataframe containing vertical profile (numlev rows). 
 95            See below for data columns. profile is only present when readprofiles=True
 96
 97        For "derived" files, the returned DataFrame also contains the following profile summary columns        
 98        - pw         = precipitable water, mm
 99        - invpress   = inversion pressure, hPa
100        - invhgt     = inversion height, m AGL
101        - invtempdif = temperature difference from surface to inversion, K
102        - mixpress   = pressure at top of mixed layer (parcel method), hPa
103        - mixhgt     = height of mixed layer, m AGL
104        - frzpress   = pressure at freezing level, hPa
105        - frzhgt     = height of mixing level, m AGL
106        - lclpress   = pressure at the LCL, hPa
107        - lclhgt     = height of the LCL, m AGL
108        - lfcpress   = pressure of the LFC, hPa
109        - lfchgt     = height of the LFC, m AGL
110        - lnbpress   = pressure of the LNB, hPa
111        - lnbhgt     = height of LNB, m AGL
112        - LI         = Lifted index, C
113        - SI         = Showalter index, C
114        - KI         = K index, C
115        - TTI        = Total totals index, C
116        - CAPE       = CAPE, J/kg
117        - CIN        = Convective inhibition, J/kg
118
119        The "profile" field is a sub-DataFrame containing the following variables and "numlev" rows"
120        - p       = pressure, hPa 
121        - z       = altitude, m
122        - T       = temperature, C
123        - Td      = dewpoint, C
124        - RH      = relative humidity, %
125        - dpdp    = dewpoint depression, C
126        - wdir    = wind direction, 0-360 degrees
127        - wspd    = wind speed, m/s
128        - pflag   = pressure flag, see IGRA documentation
129        - zflag   = altitude flag, see IGRA documentation
130        - Tflag   = temperature flag, see IGRA documentation
131        For "derived" files, "profile" also contains the following variables
132        - zrep    = ?
133        - Tgrad   = ?
134        - Tpot    = potential temperature, K?
135        - Tpotgrad = ?
136        - Tvirt   = virtual temperature, K?
137        - Tvirtpot= virtual potential temperature, K?
138        - e       = water vapor pressure, hPa
139        - es      = saturation water vapor pressure, hPa
140        - RHrep   = ?
141        - RHgrad  = ?
142        - u       = eastward component of wind, m/s
143        - v       = northward component of wind, m/s
144        - ugrad   = ? m/s
145        - vgrad   = ? m/s
146        - N       = ?
147    '''
148
149    # Try to read the data from pickle; Read it from ascii file if pickle doesn't exist
150    try:
151        data = pd.read_pickle(file+'.pkl')
152    except FileNotFoundError:
153
154        # line number counter
155        lnum = 0
156
157        # profile number
158        pnum = 0
159
160        # Define empty data frame
161        data = pd.DataFrame(columns=('siteID','time','syntime','profile'))
162
163        first = True
164
165        basewidth = [12,5,3,3,3,5,5]
166        basenames = ['siteID','year','month','day','hour','reltime','numlev']
167
168        # Open file for line-by-line reading
169        with open( file, 'r', encoding='ascii' ) as f:
170            for line in f:
171
172                # Increment line counter
173                lnum += 1
174
175                # Raise error if line doesn't begin with "#"
176                if line[0] != '#':
177                    print('Unexpected IGRA file format. Header lines should begin with "#"')
178                    print('line ',lnum,' in file ',file)
179                    print(line)
180                    raise ValueError()
181
182                # Fields that are the same for sounding and derived files
183                siteID     =      line[1:12]
184                year       = int( line[13:17] )
185                month      = int( line[18:20] )
186                day        = int( line[21:23] )
187                hour       = int( line[24:26] )
188                release_time = int( line[27:31] )
189                numlev     = int( line[31:36] )
190
191                # Extract hour and minute from release time
192                release_hour    = int( release_time / 100 )
193                release_min     = np.mod( release_time, 100 )
194
195                # Use the nominal time when release time is missing
196                if release_hour==99:
197                    release_hour = hour
198                if release_min==99:
199                    release_min = 0
200
201                # Actual launch time
202                time = dt.datetime( year, month, day, release_hour, release_min )
203
204                # Synoptic time (Typically 0Z or 12Z)
205                syntime = dt.datetime( year, month, day, hour )
206
207                # Read variables that differ between derived and standard files
208                if derived:
209
210                    # Header items for derived files
211                    pw         = float( line[37:43] ) / 100
212                    invpress   = float( line[43:49] ) / 100
213                    invhgt     = float( line[49:55] )
214                    invtempdif = float( line[55:61] ) / 10
215                    mixpress   = float( line[61:67] ) / 100
216                    mixhgt     = float( line[67:73] )
217                    frzpress   = float( line[73:79] ) / 100
218                    frzhgt     = float( line[79:85] )
219                    lclpress   = float( line[85:91] ) / 100
220                    lclhgt     = float( line[91:97] )
221                    lfcpress   = float( line[97:103] ) / 100
222                    lfchgt     = float( line[103:109] )
223                    lnbpress   = float( line[109:115] ) / 100
224                    lnbhgt     = float( line[115:121] )
225                    LI         = float( line[121:127] )
226                    SI         = float( line[127:133] )
227                    KI         = float( line[133:139] )
228                    TTI        = float( line[139:145] )
229                    CAPE       = float( line[145:151] )
230                    CIN        = float( line[151:157] )
231
232                    # Profile metadata 
233                    info = { 'siteID':    siteID,
234                             'time':      time,
235                             'syntime':   syntime,
236                             'numlev':    numlev,
237                             'pw':        pw,
238                             'pInversion':invpress,
239                             'zInversion':invhgt,
240                             'dTinversion':invtempdif,
241                             'pMix':      mixpress,
242                             'zMix':      mixhgt,
243                             'pFreeze':   frzpress,
244                             'zFreeze':   frzhgt,
245                             'pLCL':      lclpress,
246                             'zLCL':      lclhgt,
247                             'pLFC':      lfcpress,
248                             'zLFC':      lfchgt,
249                             'pLNB':      lnbpress,
250                             'zLNB':      lnbhgt,
251                             'LI':      LI,
252                             'SI':      SI,
253                             'KI':      KI,
254                             'TTI':     TTI,
255                             'CAPE':    CAPE,
256                             'CIN':     CIN }
257
258                else:
259                    
260                    p_src  =        line[37:45]
261                    np_src =        line[46:54]
262                    lat    = float( line[55:62] ) / 1e4
263                    lon    = float( line[63:71] ) / 1e4
264
265                    # Profile metadata 
266                    info = { 'siteID':  siteID,
267                             'time':    time,
268                             'syntime': syntime,
269                             'numlev':  numlev }
270
271                # Replace missing data
272                for key in info.keys():
273                    if (info[key] in [-99999, -9999.9, -999.99]):
274                        info[key] = np.nan
275
276                # Print some info every 100 entries
277                if np.mod( pnum, 100 )==0:
278                    print('{:4d}-{:02d}-{:02d} {:02d}:{:02d}'.format(
279                        year, month, day, release_hour, release_min ))
280
281                # Read the vertical profile
282                if (readprofiles and derived):
283                    profile = pd.read_fwf( f, nrows=numlev,
284                                           header=None,
285                                           na_values=[-9999],
286                                           widths=[7]+[8]*18,
287                                           names=['p','zrep','z','T','Tgrad',
288                                                  'Tpot','Tpotgrad','Tvirt','Tvirtpot',
289                                                  'e','es',
290                                                  'RHrep','RH','RHgrad',
291                                                  'u','ugrad','v','vgrad','N'] )
292
293                    # Convert Pa -> hPa
294                    profile['p'] /= 100
295
296                    # Convert K*10 -> K
297                    profile[['T','Tgrad','Tpot','Tpotgrad','Tvirt','Tvirtpot']] /= 10 
298
299                    # Convert vapor pressure, hPa*1000 -> hPa
300                    profile[['e','es']] /= 1000
301
302                    # Convert %*10 -> %
303                    profile[['RH','RHrep']] /= 10
304
305                    # Convert m/s*10 -> m/s
306                    profile[['u','ugrad','v','vgrad']] /= 10
307
308                    # Add profile to data
309                    info.update({'profile': profile})
310
311                elif readprofiles:
312
313                    # Read the sounding
314                    # Units: p, Pa; z, m; T, C*10; RH, %*10; dpdp, C*10 (dewpoint depression);
315                    # wdir, degree; wspd, m/s*10
316                    profile = pd.read_fwf(f, nrows=numlev,
317                                          header=None,
318                                          na_values=[-8888,-9999], 
319                                          widths=[1,1,6,7,1,5,1,5,1,5,6,6,6],
320                                          names=['levtype1','levtype2','etime',
321                                                 'p','pflag','z','zflag','T','Tflag',
322                                                 'RH','dpdp','wdir','wspd'] )
323
324                    # Keep level types 1* (standard pressure), 2* (other pressure level)
325                    # Drop level type 3* (non-pressure levels)
326                    profile = profile[ profile.levtype1 != 3 ]
327
328                    # Convert Pa -> hPa
329                    profile['p'] /= 100
330
331                    # Convert C*10 -> C
332                    profile['T']     = profile['T'] / 10
333                    profile['dpdp'] /= 10
334
335                    # Convert %*10 -> %
336                    profile['RH'] /= 10
337
338                    # Convert m/s*10 -> m/s
339                    profile['wspd'] /= 10
340
341                    # Dewpoint, C
342                    profile['Td'] = profile['T'] - profile['dpdp']
343
344                    # Add profile to data
345                    info.update({'profile': profile})
346
347                else:
348                    # Don't read the profile
349                    # Skip the lines containing the profile
350                    for i in range(numlev):
351                        next(f)
352
353                # Increment line counter
354                lnum += numlev
355
356                # Increment profile number
357                pnum += 1
358
359                # Create an empty dataframe on first pass
360                if first:
361                    data = pd.DataFrame(columns=info.keys())
362                    first= False
363
364                # Add this datapoint; Use nominal time for the index
365                data.loc[syntime] = info
366
367        # Save data as pickle file
368        data.to_pickle(file+'.pkl')
369
370    return data

Read file with IGRA soundings

This function can read IGRA files with and without derived variables. If only the derived variables are needed, reading the profiles can be skipped to improve speed.

Parameters
  • file (str): name/path of file to read
  • derived (bool): Set derived=True for files with derived variables and derived=False (default) for files with only profile measurements
  • readprofiles (bool): Set readprofiles=False to skip reading the profiles. Then, only header info and derived variables will be read
Returns
  • pandas.Dataframe: Containing columns
    • siteID = ID code
    • syntime = Nominal synoptic time (0Z or 12Z) of launch
    • time = Actual launch time, if provided, otherwise same as synoptic time
    • numlev = number of measurement levels in the profile
    • profile = sub-dataframe containing vertical profile (numlev rows). See below for data columns. profile is only present when readprofiles=True

For "derived" files, the returned DataFrame also contains the following profile summary columns

  • pw = precipitable water, mm
  • invpress = inversion pressure, hPa
  • invhgt = inversion height, m AGL
  • invtempdif = temperature difference from surface to inversion, K
  • mixpress = pressure at top of mixed layer (parcel method), hPa
  • mixhgt = height of mixed layer, m AGL
  • frzpress = pressure at freezing level, hPa
  • frzhgt = height of mixing level, m AGL
  • lclpress = pressure at the LCL, hPa
  • lclhgt = height of the LCL, m AGL
  • lfcpress = pressure of the LFC, hPa
  • lfchgt = height of the LFC, m AGL
  • lnbpress = pressure of the LNB, hPa
  • lnbhgt = height of LNB, m AGL
  • LI = Lifted index, C
  • SI = Showalter index, C
  • KI = K index, C
  • TTI = Total totals index, C
  • CAPE = CAPE, J/kg
  • CIN = Convective inhibition, J/kg

The "profile" field is a sub-DataFrame containing the following variables and "numlev" rows"

  • p = pressure, hPa
  • z = altitude, m
  • T = temperature, C
  • Td = dewpoint, C
  • RH = relative humidity, %
  • dpdp = dewpoint depression, C
  • wdir = wind direction, 0-360 degrees
  • wspd = wind speed, m/s
  • pflag = pressure flag, see IGRA documentation
  • zflag = altitude flag, see IGRA documentation
  • Tflag = temperature flag, see IGRA documentation For "derived" files, "profile" also contains the following variables
  • zrep = ?
  • Tgrad = ?
  • Tpot = potential temperature, K?
  • Tpotgrad = ?
  • Tvirt = virtual temperature, K?
  • Tvirtpot= virtual potential temperature, K?
  • e = water vapor pressure, hPa
  • es = saturation water vapor pressure, hPa
  • RHrep = ?
  • RHgrad = ?
  • u = eastward component of wind, m/s
  • v = northward component of wind, m/s
  • ugrad = ? m/s
  • vgrad = ? m/s
  • N = ?
def demo():
372def demo():
373    '''Read some sample data and plot it'''
374
375    import matplotlib.pyplot as plt
376
377    # Read the Barrow data
378    data = read_igra_file( 'data/Barrow_2000.txt' )
379
380    # Find the souding closest to 2000-06-01 12:00 UTC
381    idx = np.argmin( np.abs( data.index - dt.datetime(2000,6,1,0) ) )
382
383    profile = data.iloc[idx].profile
384
385    plt.clf()
386    plt.plot( profile['T'], profile['z'], label='T' )
387    plt.plot( profile['Td'], profile['z'], label='Td' )
388    plt.title( data.index[idx] )
389    plt.xlabel( 'Temperature, K' )
390    plt.ylabel( 'Altitude, m' )
391    plt.ylim((0,4000))
392    plt.legend()

Read some sample data and plot it