mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
fix[csv_parser]:missing header
This commit is contained in:
@@ -104,9 +104,13 @@ class PandasCSVParser(BaseParser):
|
||||
raise ValueError("pandas module is required to read CSV files.")
|
||||
|
||||
df = pd.read_csv(file, **self._pandas_config)
|
||||
headers = df.columns.tolist()
|
||||
|
||||
text_list = df.apply(
|
||||
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
|
||||
lambda row: self._col_joiner.join(
|
||||
[f"{headers[i]}: {str(val)}" for i, val in enumerate(row)]
|
||||
),
|
||||
axis=1,
|
||||
).tolist()
|
||||
|
||||
if self._concat_rows:
|
||||
@@ -169,12 +173,16 @@ class ExcelParser(BaseParser):
|
||||
raise ValueError("pandas module is required to read Excel files.")
|
||||
|
||||
df = pd.read_excel(file, **self._pandas_config)
|
||||
headers = df.columns.tolist()
|
||||
|
||||
text_list = df.apply(
|
||||
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
|
||||
lambda row: self._col_joiner.join(
|
||||
[f"{headers[i]}: {str(val)}" for i, val in enumerate(row)]
|
||||
),
|
||||
axis=1,
|
||||
).tolist()
|
||||
|
||||
if self._concat_rows:
|
||||
return (self._row_joiner).join(text_list)
|
||||
else:
|
||||
return text_list
|
||||
return text_list
|
||||
|
||||
Reference in New Issue
Block a user