ℹ️ These docs are for current Galileo customers. Docs for the free version of Galileo, can be found here.
Integrate Galileo with Delta Lake on Databricks to manage large-scale data, ensuring seamless collaboration and enhanced NLP workflows.
import os import pandas as pd from deltalake import DeltaTable, write_deltalake # Dataframe with 2 columns: text and label df_train = pd.DataFrame({"text": newsgroups_train.data, "label": newsgroups_train.target}) df_test = pd.DataFrame({"text": newsgroups_test.data, "label": newsgroups_test.target}) write_deltalake("tmp/delta_lake_path", df_train) write_deltalake("tmp/delta_lake_path", df_test) df_train_from_deltalake = DeltaTable("tmp/delta_lake_path").to_pandas() df_test_from_deltalake = DeltaTable("tmp/delta_lake_path").to_pandas() dq.auto( train_data=df_test_from_deltalake, test_data=df_test_from_deltalake, labels=newsgroups_train.target_names, project_name="my_newsgroups_project", run_name="run_1" )
Was this page helpful?