Skip to content

Commit d6aa3e1

Browse files
committed
fix.
1 parent b2de41c commit d6aa3e1

7 files changed

Lines changed: 75 additions & 74 deletions

File tree

python/lower_case_name.tsfile

22.5 KB
Binary file not shown.
8.85 KB
Binary file not shown.

python/test1.tsfile

22.5 KB
Binary file not shown.

python/tests/test_to_tsfile.py

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from pandas.core.dtypes.common import is_integer_dtype
2424

2525
from tsfile import to_dataframe, ColumnCategory
26-
from tsfile.utils import to_tsfile
26+
from tsfile.utils import dataframe_to_tsfile
2727

2828

2929
def convert_to_nullable_types(df):
@@ -48,9 +48,9 @@ def convert_to_nullable_types(df):
4848
return df
4949

5050

51-
def test_to_tsfile_basic():
52-
"""Test basic to_tsfile functionality with time column."""
53-
tsfile_path = "test_to_tsfile_basic.tsfile"
51+
def test_dataframe_to_tsfile_basic():
52+
"""Test basic dataframe_to_tsfile functionality with time column."""
53+
tsfile_path = "test_dataframe_to_tsfile_basic.tsfile"
5454
try:
5555
if os.path.exists(tsfile_path):
5656
os.remove(tsfile_path)
@@ -63,7 +63,7 @@ def test_to_tsfile_basic():
6363
'value2': [i * 10 for i in range(100)]
6464
})
6565

66-
to_tsfile(df, tsfile_path, table_name="test_table")
66+
dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
6767

6868
# Verify by reading back
6969
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -80,9 +80,9 @@ def test_to_tsfile_basic():
8080
os.remove(tsfile_path)
8181

8282

83-
def test_to_tsfile_with_index():
84-
"""Test to_tsfile using DataFrame index as time when no 'time' column exists."""
85-
tsfile_path = "test_to_tsfile_index.tsfile"
83+
def test_dataframe_to_tsfile_with_index():
84+
"""Test dataframe_to_tsfile using DataFrame index as time when no 'time' column exists."""
85+
tsfile_path = "test_dataframe_to_tsfile_index.tsfile"
8686
try:
8787
if os.path.exists(tsfile_path):
8888
os.remove(tsfile_path)
@@ -94,7 +94,7 @@ def test_to_tsfile_with_index():
9494
})
9595
df.index = [i * 10 for i in range(50)] # Set index as timestamps
9696

97-
to_tsfile(df, tsfile_path, table_name="test_table")
97+
dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
9898

9999
# Verify by reading back
100100
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -112,9 +112,9 @@ def test_to_tsfile_with_index():
112112
os.remove(tsfile_path)
113113

114114

115-
def test_to_tsfile_custom_time_column():
116-
"""Test to_tsfile with custom time column name."""
117-
tsfile_path = "test_to_tsfile_custom_time.tsfile"
115+
def test_dataframe_to_tsfile_custom_time_column():
116+
"""Test dataframe_to_tsfile with custom time column name."""
117+
tsfile_path = "test_dataframe_to_tsfile_custom_time.tsfile"
118118
try:
119119
if os.path.exists(tsfile_path):
120120
os.remove(tsfile_path)
@@ -125,7 +125,7 @@ def test_to_tsfile_custom_time_column():
125125
'value': [i * 3.0 for i in range(30)]
126126
})
127127

128-
to_tsfile(df, tsfile_path, table_name="test_table", time_column="timestamp")
128+
dataframe_to_tsfile(df, tsfile_path, table_name="test_table", time_column="timestamp")
129129

130130
# Verify by reading back
131131
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -141,9 +141,9 @@ def test_to_tsfile_custom_time_column():
141141
os.remove(tsfile_path)
142142

143143

144-
def test_to_tsfile_with_tag_columns():
145-
"""Test to_tsfile with tag columns specified."""
146-
tsfile_path = "test_to_tsfile_tags.tsfile"
144+
def test_dataframe_to_tsfile_with_tag_columns():
145+
"""Test dataframe_to_tsfile with tag columns specified."""
146+
tsfile_path = "test_dataframe_to_tsfile_tags.tsfile"
147147
try:
148148
if os.path.exists(tsfile_path):
149149
os.remove(tsfile_path)
@@ -155,7 +155,7 @@ def test_to_tsfile_with_tag_columns():
155155
'value': [i * 1.5 for i in range(20)]
156156
})
157157

158-
to_tsfile(df, tsfile_path, table_name="test_table", tag_column=["device", "location"])
158+
dataframe_to_tsfile(df, tsfile_path, table_name="test_table", tag_column=["device", "location"])
159159

160160
# Verify by reading back
161161
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -171,9 +171,9 @@ def test_to_tsfile_with_tag_columns():
171171
os.remove(tsfile_path)
172172

173173

174-
def test_to_tsfile_all_datatypes():
175-
"""Test to_tsfile with all supported data types."""
176-
tsfile_path = "test_to_tsfile_all_types.tsfile"
174+
def test_dataframe_to_tsfile_all_datatypes():
175+
"""Test dataframe_to_tsfile with all supported data types."""
176+
tsfile_path = "test_dataframe_to_tsfile_all_types.tsfile"
177177
try:
178178
if os.path.exists(tsfile_path):
179179
os.remove(tsfile_path)
@@ -189,7 +189,7 @@ def test_to_tsfile_all_datatypes():
189189
'blob_col': [f"blob{i}".encode('utf-8') for i in range(50)]
190190
})
191191

192-
to_tsfile(df, tsfile_path, table_name="test_table")
192+
dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
193193

194194
# Verify by reading back
195195
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -211,9 +211,9 @@ def test_to_tsfile_all_datatypes():
211211
os.remove(tsfile_path)
212212

213213

214-
def test_to_tsfile_default_table_name():
215-
"""Test to_tsfile with default table name."""
216-
tsfile_path = "test_to_tsfile_default_name.tsfile"
214+
def test_dataframe_to_tsfile_default_table_name():
215+
"""Test dataframe_to_tsfile with default table name."""
216+
tsfile_path = "test_dataframe_to_tsfile_default_name.tsfile"
217217
try:
218218
if os.path.exists(tsfile_path):
219219
os.remove(tsfile_path)
@@ -223,7 +223,7 @@ def test_to_tsfile_default_table_name():
223223
'value': [i * 1.0 for i in range(10)]
224224
})
225225

226-
to_tsfile(df, tsfile_path) # No table_name specified
226+
dataframe_to_tsfile(df, tsfile_path) # No table_name specified
227227

228228
# Verify by reading back with default table name
229229
df_read = to_dataframe(tsfile_path, table_name="table")
@@ -233,9 +233,9 @@ def test_to_tsfile_default_table_name():
233233
os.remove(tsfile_path)
234234

235235

236-
def test_to_tsfile_case_insensitive_time():
237-
"""Test to_tsfile with case-insensitive time column."""
238-
tsfile_path = "test_to_tsfile_case_time.tsfile"
236+
def test_dataframe_to_tsfile_case_insensitive_time():
237+
"""Test dataframe_to_tsfile with case-insensitive time column."""
238+
tsfile_path = "test_dataframe_to_tsfile_case_time.tsfile"
239239
try:
240240
if os.path.exists(tsfile_path):
241241
os.remove(tsfile_path)
@@ -245,7 +245,7 @@ def test_to_tsfile_case_insensitive_time():
245245
'value': [i * 2.0 for i in range(20)]
246246
})
247247

248-
to_tsfile(df, tsfile_path, table_name="test_table")
248+
dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
249249

250250
# Verify by reading back
251251
df_read = to_dataframe(tsfile_path, table_name="test_table")
@@ -256,25 +256,25 @@ def test_to_tsfile_case_insensitive_time():
256256
os.remove(tsfile_path)
257257

258258

259-
def test_to_tsfile_empty_dataframe():
260-
"""Test to_tsfile raises error for empty DataFrame."""
261-
tsfile_path = "test_to_tsfile_empty.tsfile"
259+
def test_dataframe_to_tsfile_empty_dataframe():
260+
"""Test dataframe_to_tsfile raises error for empty DataFrame."""
261+
tsfile_path = "test_dataframe_to_tsfile_empty.tsfile"
262262
try:
263263
if os.path.exists(tsfile_path):
264264
os.remove(tsfile_path)
265265

266266
df = pd.DataFrame()
267267

268268
with pytest.raises(ValueError, match="DataFrame cannot be None or empty"):
269-
to_tsfile(df, tsfile_path)
269+
dataframe_to_tsfile(df, tsfile_path)
270270
finally:
271271
if os.path.exists(tsfile_path):
272272
os.remove(tsfile_path)
273273

274274

275-
def test_to_tsfile_no_data_columns():
276-
"""Test to_tsfile raises error when only time column exists."""
277-
tsfile_path = "test_to_tsfile_no_data.tsfile"
275+
def test_dataframe_to_tsfile_no_data_columns():
276+
"""Test dataframe_to_tsfile raises error when only time column exists."""
277+
tsfile_path = "test_dataframe_to_tsfile_no_data.tsfile"
278278
try:
279279
if os.path.exists(tsfile_path):
280280
os.remove(tsfile_path)
@@ -284,15 +284,15 @@ def test_to_tsfile_no_data_columns():
284284
})
285285

286286
with pytest.raises(ValueError, match="DataFrame must have at least one data column"):
287-
to_tsfile(df, tsfile_path)
287+
dataframe_to_tsfile(df, tsfile_path)
288288
finally:
289289
if os.path.exists(tsfile_path):
290290
os.remove(tsfile_path)
291291

292292

293-
def test_to_tsfile_invalid_time_column():
294-
"""Test to_tsfile raises error for invalid time column."""
295-
tsfile_path = "test_to_tsfile_invalid_time.tsfile"
293+
def test_dataframe_to_tsfile_invalid_time_column():
294+
"""Test dataframe_to_tsfile raises error for invalid time column."""
295+
tsfile_path = "test_dataframe_to_tsfile_invalid_time.tsfile"
296296
try:
297297
if os.path.exists(tsfile_path):
298298
os.remove(tsfile_path)
@@ -304,15 +304,15 @@ def test_to_tsfile_invalid_time_column():
304304

305305
# Time column doesn't exist
306306
with pytest.raises(ValueError, match="Time column 'time' not found"):
307-
to_tsfile(df, tsfile_path, time_column="time")
307+
dataframe_to_tsfile(df, tsfile_path, time_column="time")
308308
finally:
309309
if os.path.exists(tsfile_path):
310310
os.remove(tsfile_path)
311311

312312

313-
def test_to_tsfile_non_integer_time_column():
314-
"""Test to_tsfile raises error for non-integer time column."""
315-
tsfile_path = "test_to_tsfile_non_int_time.tsfile"
313+
def test_dataframe_to_tsfile_non_integer_time_column():
314+
"""Test dataframe_to_tsfile raises error for non-integer time column."""
315+
tsfile_path = "test_dataframe_to_tsfile_non_int_time.tsfile"
316316
try:
317317
if os.path.exists(tsfile_path):
318318
os.remove(tsfile_path)
@@ -323,15 +323,15 @@ def test_to_tsfile_non_integer_time_column():
323323
})
324324

325325
with pytest.raises(TypeError, match="must be integer type"):
326-
to_tsfile(df, tsfile_path)
326+
dataframe_to_tsfile(df, tsfile_path)
327327
finally:
328328
if os.path.exists(tsfile_path):
329329
os.remove(tsfile_path)
330330

331331

332-
def test_to_tsfile_invalid_tag_column():
333-
"""Test to_tsfile raises error for invalid tag column."""
334-
tsfile_path = "test_to_tsfile_invalid_tag.tsfile"
332+
def test_dataframe_to_tsfile_invalid_tag_column():
333+
"""Test dataframe_to_tsfile raises error for invalid tag column."""
334+
tsfile_path = "test_dataframe_to_tsfile_invalid_tag.tsfile"
335335
try:
336336
if os.path.exists(tsfile_path):
337337
os.remove(tsfile_path)
@@ -342,15 +342,15 @@ def test_to_tsfile_invalid_tag_column():
342342
})
343343

344344
with pytest.raises(ValueError, match="Tag column 'invalid' not found"):
345-
to_tsfile(df, tsfile_path, tag_column=["invalid"])
345+
dataframe_to_tsfile(df, tsfile_path, tag_column=["invalid"])
346346
finally:
347347
if os.path.exists(tsfile_path):
348348
os.remove(tsfile_path)
349349

350350

351-
def test_to_tsfile_string_vs_blob():
352-
"""Test to_tsfile correctly distinguishes between STRING and BLOB."""
353-
tsfile_path = "test_to_tsfile_string_blob.tsfile"
351+
def test_dataframe_to_tsfile_string_vs_blob():
352+
"""Test dataframe_to_tsfile correctly distinguishes between STRING and BLOB."""
353+
tsfile_path = "test_dataframe_to_tsfile_string_blob.tsfile"
354354
try:
355355
if os.path.exists(tsfile_path):
356356
os.remove(tsfile_path)
@@ -361,7 +361,7 @@ def test_to_tsfile_string_vs_blob():
361361
'blob_col': [f"blob{i}".encode('utf-8') for i in range(20)] # Bytes
362362
})
363363

364-
to_tsfile(df, tsfile_path, table_name="test_table")
364+
dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
365365

366366
# Verify by reading back
367367
df_read = to_dataframe(tsfile_path, table_name="test_table")

python/tsfile/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,4 @@
3434
from .tsfile_writer import TsFileWriterPy as TsFileWriter
3535
from .tsfile_py_cpp import get_tsfile_config, set_tsfile_config
3636
from .tsfile_table_writer import TsFileTableWriter
37-
from .utils import to_dataframe
37+
from .utils import to_dataframe, dataframe_to_tsfile

python/tsfile/tsfile_table_writer.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,15 @@
2121
from tsfile import TsFileWriter
2222
from tsfile.constants import TSDataType
2323
from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
24-
from tsfile.utils import check_string_or_blob
24+
25+
def check_string_or_blob(ts_data_type: TSDataType, dtype, column_series: pd.Series) -> TSDataType:
26+
if ts_data_type == TSDataType.STRING and (dtype == 'object' or str(dtype) == "<class 'numpy.object_'>"):
27+
first_valid_idx = column_series.first_valid_index()
28+
if first_valid_idx is not None:
29+
first_value = column_series[first_valid_idx]
30+
if isinstance(first_value, bytes):
31+
return TSDataType.BLOB
32+
return ts_data_type
2533

2634

2735
class TsFileTableWriter:

python/tsfile/utils.py

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,10 @@
2222
import pandas as pd
2323
from pandas.core.dtypes.common import is_integer_dtype
2424

25+
from tsfile import ColumnSchema, TableSchema, ColumnCategory, TSDataType
2526
from tsfile.exceptions import TableNotExistError, ColumnNotExistError
2627
from tsfile.tsfile_reader import TsFileReaderPy
27-
from tsfile import ColumnSchema, TableSchema, ColumnCategory, TSDataType, TsFileTableWriter
28-
29-
30-
def check_string_or_blob(ts_data_type: TSDataType, dtype, column_series: pd.Series) -> TSDataType:
31-
if ts_data_type == TSDataType.STRING and (dtype == 'object' or str(dtype) == "<class 'numpy.object_'>"):
32-
first_valid_idx = column_series.first_valid_index()
33-
if first_valid_idx is not None:
34-
first_value = column_series[first_valid_idx]
35-
if isinstance(first_value, bytes):
36-
return TSDataType.BLOB
37-
return ts_data_type
28+
from tsfile.tsfile_table_writer import TsFileTableWriter, check_string_or_blob
3829

3930

4031
def to_dataframe(file_path: str,
@@ -174,11 +165,11 @@ def _gen(is_iterator: bool) -> Iterator[pd.DataFrame]:
174165

175166

176167
def dataframe_to_tsfile(dataframe: pd.DataFrame,
177-
file_path: str,
178-
table_name: Optional[str] = None,
179-
time_column: Optional[str] = None,
180-
tag_column: Optional[list[str]] = None,
181-
):
168+
file_path: str,
169+
table_name: Optional[str] = None,
170+
time_column: Optional[str] = None,
171+
tag_column: Optional[list[str]] = None,
172+
):
182173
"""
183174
Write a pandas DataFrame to a TsFile by inferring the table schema from the DataFrame.
184175
@@ -227,7 +218,8 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
227218
if time_column not in dataframe.columns:
228219
raise ValueError(f"Time column '{time_column}' not found in DataFrame")
229220
if not is_integer_dtype(dataframe[time_column].dtype):
230-
raise TypeError(f"Time column '{time_column}' must be integer type (int64 or int), got {dataframe[time_column].dtype}")
221+
raise TypeError(
222+
f"Time column '{time_column}' must be integer type (int64 or int), got {dataframe[time_column].dtype}")
231223
time_col_name = time_column
232224
else:
233225
for col in dataframe.columns:
@@ -236,7 +228,8 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
236228
time_col_name = col
237229
break
238230
else:
239-
raise TypeError(f"Time column '{col}' must be integer type (int64 or int), got {dataframe[col].dtype}")
231+
raise TypeError(
232+
f"Time column '{col}' must be integer type (int64 or int), got {dataframe[col].dtype}")
240233

241234
data_columns = [col for col in dataframe.columns if col != time_col_name]
242235

@@ -271,4 +264,4 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
271264
df_to_write = dataframe
272265

273266
with TsFileTableWriter(file_path, table_schema) as writer:
274-
writer.write_dataframe(df_to_write)
267+
writer.write_dataframe(df_to_write)

0 commit comments

Comments
 (0)