zaidmehdi commited on
Commit
402d8e8
1 Parent(s): adeabee

convert df to datasetdict

Browse files
Files changed (1) hide show
  1. src/utils.py +14 -0
src/utils.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import DatasetDict, Dataset
2
+ import pandas as pd
3
+
4
+
5
+ def convert_df_to_dataset_dict(df_train:pd.DataFrame, df_test:pd.DataFrame)-> DatasetDict:
6
+ mapper = {"#2_tweet": "tweet", "#3_country_label": "label"}
7
+ df_train = df_train.rename(columns=mapper)
8
+ df_test = df_test.rename(columns=mapper)
9
+ print(df_train.columns)
10
+ columns_to_keep = ["tweet", "label"]
11
+ train_dataset = Dataset.from_pandas(df_train[columns_to_keep])
12
+ test_dataset = Dataset.from_pandas(df_test[columns_to_keep])
13
+
14
+ return DatasetDict({'train': train_dataset, 'test': test_dataset})