Source code for feast.loaders.file

import shutil
import tempfile
from typing import Optional
from urllib.parse import urlparse
import uuid
import pandas as pd
from datetime import datetime
from google.cloud import storage
from pandavro import to_avro


[docs]def export_dataframe_to_staging_location( df: pd.DataFrame, staging_location_uri: str ) -> str: """ Uploads a dataframe to a remote staging location Args: df: Pandas dataframe staging_location_uri: Remote staging location where dataframe should be written Examples: gs://bucket/path/ file:///data/subfolder/ Returns: Returns the full path to the file in the remote staging location """ # Validate staging location uri = urlparse(staging_location_uri) if uri.scheme == "gs": dir_path, file_name, source_path = export_dataframe_to_local(df) upload_file_to_gcs( source_path, uri.hostname, str(uri.path).strip("/") + "/" + file_name ) if len(str(dir_path)) < 5: raise Exception(f"Export location {dir_path} dangerous. Stopping.") shutil.rmtree(dir_path) elif uri.scheme == "file": dir_path, file_name, source_path = export_dataframe_to_local(df, uri.path) else: raise Exception( f"Staging location {staging_location_uri} does not have a valid URI. Only gs:// and file:// are supported" ) return staging_location_uri.rstrip("/") + "/" + file_name
[docs]def export_dataframe_to_local(df: pd.DataFrame, dir_path: Optional[str] = None): """ Exports a pandas dataframe to the local filesystem Args: df: Pandas dataframe to save dir_path: (optional) Absolute directory path '/data/project/subfolder/' """ # Create local staging location if not provided if dir_path is None: dir_path = tempfile.mkdtemp() file_name = f'{datetime.now().strftime("%d-%m-%Y_%I-%M-%S_%p")}_{str(uuid.uuid4())[:8]}.avro' dest_path = f"{dir_path}/{file_name}" # Export dataset to file in local path to_avro(df=df, file_path_or_buffer=dest_path) return dir_path, file_name, dest_path
[docs]def upload_file_to_gcs(local_path: str, bucket: str, remote_path: str): """ Upload a file from the local file system to Google Cloud Storage (GCS) Args: local_path: Local filesystem path of file to upload bucket: GCS bucket to upload to remote_path: Path within GCS bucket to upload file to, includes file name """ storage_client = storage.Client(project=None) bucket = storage_client.get_bucket(bucket) blob = bucket.blob(remote_path) blob.upload_from_filename(local_path)