Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions pyreadstat/_readstat_parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,13 @@ cdef object transform_datetime(py_datetime_format var_format, double tstamp, py_
return mydat.date()
elif var_format == DATE_FORMAT_DATETIME:
if output_format == "polars":
# we want to return seconds from unix
# we want to return timestamp in seconds
if file_format == FILE_FORMAT_STATA:
# tstamp is in millisecons
return (tstamp/1000) - unix_to_origin_secs
return (tstamp/1000)
else:
# tstamp in seconds
return tstamp - unix_to_origin_secs
return tstamp

if file_format == FILE_FORMAT_STATA:
# tstamp is in millisecons
Expand All @@ -253,7 +253,8 @@ cdef object transform_datetime(py_datetime_format var_format, double tstamp, py_
# tstamp in seconds
days = <int> (floor(tstamp / 86400))
secs = <int> (tstamp % 86400)
tdelta = timedelta_new(days, secs, 0)
usecs = <int> (round(tstamp % 1 * 1e6))
tdelta = timedelta_new(days, secs, usecs)
#tdelta = timedelta(seconds=tstamp)
mydat = origin + tdelta
return mydat
Expand All @@ -270,7 +271,8 @@ cdef object transform_datetime(py_datetime_format var_format, double tstamp, py_
# tstamp in seconds
days = <int> (floor(tstamp / 86400))
secs = <int> (tstamp % 86400)
tdelta = timedelta_new(days, secs, 0)
usecs = <int> (round(tstamp % 1 * 1e6))
tdelta = timedelta_new(days, secs, usecs)
#tdelta = timedelta(seconds=tstamp)
mydat = origin + tdelta
return mydat.time()
Expand Down Expand Up @@ -1105,7 +1107,16 @@ cdef object dict_to_dataframe(object dict_data, data_container dc):
if var_format == DATE_FORMAT_DATE:
date_cols.append(column)
if datetime_cols:
data_frame = data_frame.with_columns(pl.from_epoch(pl.col(*datetime_cols), time_unit='s'))
data_frame = data_frame.with_columns(
[
pl.from_epoch(
(pl.col(c) % 1 * 1e6).round().cast(pl.Int64) + (
pl.col(c).floor() * 1e6).cast(pl.Int64) - (
pl.lit(dc.unix_to_origin_secs) * 1e6).cast(pl.Int64),
time_unit='us')
for c in datetime_cols if data_frame[c].len() > 0
]
)
if date_cols:
data_frame = data_frame.with_columns(pl.from_epoch(pl.col(*date_cols), time_unit='d'))

Expand Down
101 changes: 101 additions & 0 deletions test_data/basic/fractional_seconds.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
date,dtime,time
1993-06-10,1993-06-10T02:04:01.122463,02:04:01.122463
2147-07-18,2147-07-18T02:16:02.883684,02:16:02.883684
1739-04-22,1739-04-22T13:32:08.170115,13:32:08.170115
2187-12-07,2187-12-07T15:41:41.567238,15:41:41.567238
2278-04-20,2278-04-20T12:41:18.331215,12:41:18.331215
2181-09-03,2181-09-03T16:20:48.795826,16:20:48.795826
1992-01-21,1992-01-21T04:27:25.154369,04:27:25.154369
1829-09-11,1829-09-11T10:47:37.282617,10:47:37.282618
2294-10-29,2294-10-29T14:41:18.574982,14:41:18.574982
1920-04-17,1920-04-17T16:31:07.566722,16:31:07.566723
1868-07-28,1868-07-28T06:16:21.620745,06:16:21.620745
2208-03-23,2208-03-23T05:13:20.026692,05:13:20.026692
1929-05-15,1929-05-15T18:58:37.253180,18:58:37.253180
1998-08-26,1998-08-26T02:03:58.743517,02:03:58.743517
2122-11-24,2122-11-24T23:01:29.367663,23:01:29.367663
1854-08-25,1854-08-25T01:56:49.077793,01:56:49.077793
1936-01-09,1936-01-09T15:41:42.922773,15:41:42.922773
1967-11-09,1967-11-09T15:23:02.071943,15:23:02.071943
1703-11-02,1703-11-02T14:27:03.782796,14:27:03.782796
1750-11-28,1750-11-28T08:52:49.219013,08:52:49.219013
1734-07-09,1734-07-09T23:47:14.951282,23:47:14.951282
2157-07-31,2157-07-31T21:52:15.157284,21:52:15.157284
1950-01-07,1950-01-07T14:53:48.730921,14:53:48.730921
1930-05-11,1930-05-11T03:24:48.220010,03:24:48.220011
1772-06-29,1772-06-29T11:31:57.032263,11:31:57.032263
2207-11-02,2207-11-02T16:23:11.818488,16:23:11.818488
2275-05-17,2275-05-17T19:13:17.300388,19:13:17.300388
1720-01-04,1720-01-04T17:34:54.322509,17:34:54.322509
2273-01-24,2273-01-24T14:14:38.193558,14:14:38.193558
1834-11-01,1834-11-01T06:04:17.738995,06:04:17.738995
2266-10-02,2266-10-02T10:05:04.330017,10:05:04.330017
1983-11-26,1983-11-26T12:30:57.127726,12:30:57.127726
1967-11-22,1967-11-22T19:10:31.922508,19:10:31.922508
1772-12-29,1772-12-29T17:17:40.509609,17:17:40.509609
1920-08-05,1920-08-05T09:14:58.541695,09:14:58.541695
2237-04-05,2237-04-05T11:10:54.366266,11:10:54.366266
2285-04-22,2285-04-22T07:20:52.103394,07:20:52.103394
1790-03-02,1790-03-02T11:20:35.978824,11:20:35.978824
2222-09-28,2222-09-28T23:32:00.371672,23:32:00.371672
2020-06-02,2020-06-02T11:24:03.381682,11:24:03.381682
1942-09-06,1942-09-06T22:41:22.091431,22:41:22.091431
1734-04-01,1734-04-01T02:06:18.583556,02:06:18.583556
1710-04-28,1710-04-28T10:05:23.561800,10:05:23.561800
1920-02-28,1920-02-28T14:46:28.076923,14:46:28.076923
2059-01-28,2059-01-28T10:47:03.890320,10:47:03.890320
1739-04-24,1739-04-24T22:59:28.010889,22:59:28.010889
1911-07-10,1911-07-10T08:28:09.542829,08:28:09.542830
2044-05-13,2044-05-13T15:42:33.791598,15:42:33.791598
2281-02-02,2281-02-02T05:26:26.257008,05:26:26.257008
1968-03-27,1968-03-27T22:34:47.154648,22:34:47.154648
2237-11-09,2237-11-09T06:38:59.895151,06:38:59.895151
1891-11-11,1891-11-11T03:51:21.527200,03:51:21.527201
1769-05-08,1769-05-08T22:58:51.374555,22:58:51.374555
2011-02-23,2011-02-23T20:37:24.734829,20:37:24.734829
2211-07-10,2211-07-10T05:06:31.123780,05:06:31.123780
2033-03-22,2033-03-22T06:28:12.726483,06:28:12.726483
1824-10-31,1824-10-31T20:41:51.630083,20:41:51.630083
1742-04-02,1742-04-02T22:15:15.392728,22:15:15.392728
2286-01-29,2286-01-29T13:07:02.468262,13:07:02.468262
1884-03-12,1884-03-12T07:45:14.166535,07:45:14.166534
2096-04-26,2096-04-26T22:00:08.409092,22:00:08.409092
1923-12-02,1923-12-02T13:15:39.641922,13:15:39.641922
1723-06-12,1723-06-12T16:43:33.592489,16:43:33.592489
2134-11-18,2134-11-18T06:18:01.839232,06:18:01.839232
2269-02-20,2269-02-20T00:25:17.618843,00:25:17.618843
1854-04-22,1854-04-22T05:25:48.059167,05:25:48.059166
1968-07-13,1968-07-13T22:57:57.493756,22:57:57.493756
1977-01-08,1977-01-08T09:30:29.495407,09:30:29.495407
2010-12-19,2010-12-19T11:18:26.461924,11:18:26.461924
2163-09-13,2163-09-13T18:33:24.278122,18:33:24.278122
1730-01-15,1730-01-15T21:39:25.275543,21:39:25.275543
1978-09-24,1978-09-24T23:11:39.162304,23:11:39.162304
2224-02-08,2224-02-08T15:45:01.422703,15:45:01.422703
1787-02-04,1787-02-04T11:01:01.320380,11:01:01.320380
1964-01-18,1964-01-18T06:23:14.746125,06:23:14.746125
1788-12-09,1788-12-09T09:30:16.346816,09:30:16.346816
2290-09-07,2290-09-07T01:57:16.982105,01:57:16.982105
2035-02-14,2035-02-14T23:34:56.107008,23:34:56.107008
1905-02-18,1905-02-18T19:29:49.899169,19:29:49.899170
2219-10-01,2219-10-01T20:28:45.250220,20:28:45.250220
1781-08-02,1781-08-02T22:07:25.500849,22:07:25.500849
1820-07-20,1820-07-20T13:07:18.717742,13:07:18.717742
2171-03-31,2171-03-31T13:50:34.930294,13:50:34.930294
2148-06-30,2148-06-30T07:32:48.692223,07:32:48.692223
2093-03-04,2093-03-04T14:22:35.691149,14:22:35.691149
2219-01-17,2219-01-17T20:32:47.025956,20:32:47.025956
2032-06-27,2032-06-27T02:51:01.604809,02:51:01.604809
1894-04-03,1894-04-03T00:15:54.122685,00:15:54.122684
1960-02-06,1960-02-06T14:44:53.032016,14:44:53.032016
1955-10-09,1955-10-09T18:14:44.323325,18:14:44.323324
2195-06-21,2195-06-21T22:32:46.631441,22:32:46.631441
1919-12-11,1919-12-11T19:11:02.762172,19:11:02.762173
1741-05-21,1741-05-21T18:16:05.399772,18:16:05.399772
1971-10-11,1971-10-11T00:14:23.048366,00:14:23.048366
1810-04-22,1810-04-22T13:43:24.608671,13:43:24.608671
2198-05-30,2198-05-30T12:31:55.632376,12:31:55.632376
1774-04-27,1774-04-27T07:31:02.189986,07:31:02.189986
2205-05-18,2205-05-18T10:39:57.032547,10:39:57.032547
1875-08-15,1875-08-15T16:15:21.807336,16:15:21.807335
2074-07-21,2074-07-21T08:35:02.723811,08:35:02.723811
Binary file added test_data/basic/fractional_seconds.sas7bdat
Binary file not shown.
22 changes: 21 additions & 1 deletion tests/test_narwhalified.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,22 @@ def _prepare_data(self):
self.df_sas_dates = df_dates2.to_native()
#schema = {"date": nw.Date, "dtime": nw.Datetime("ns"), "time": nw.Time()}
self.df_sas_dates2 = nw.concat([df_dates2, nw.from_dict({"date":[None], "dtime":[None], "time":[None]}, backend=backend)]).to_native() #, schema=schema

# datetime and time variables with fractional seconds as well as unusual date, time and datetime formats
sas_fractional_seconds = os.path.join(self.basic_data_folder, "fractional_seconds.csv")
if backend == "polars":
kwds["try_parse_dates"] = True
df_fractional_seconds_raw = nw.read_csv(sas_fractional_seconds,backend=backend, **kwds)
df_fractional_seconds1 = df_fractional_seconds_raw.clone()
df_fractional_seconds1 = df_fractional_seconds1.to_native()
if backend == "pandas":
df_fractional_seconds1["date"] = pd.to_datetime(df_fractional_seconds1["date"])
df_fractional_seconds1["date"] = df_fractional_seconds1["date"].apply(lambda x: x.date())
df_fractional_seconds1["dtime"] = pd.to_datetime(df_fractional_seconds1["dtime"])
df_fractional_seconds1["time"] = pd.to_datetime(df_fractional_seconds1["time"], format='%H:%M:%S.%f')
df_fractional_seconds1["time"] = df_fractional_seconds1["time"].apply(lambda x: x.time())
self.df_sas_fractional_seconds = df_fractional_seconds1

# character column with nan and object column with nan (object pyreadstat writer doesn't know what to do with)
if backend == "pandas":
self.df_charnan = pd.DataFrame([[0,np.nan,np.nan],[1,"test", timedelta]], columns = ["integer", "string", "object"])
Expand Down Expand Up @@ -574,7 +590,11 @@ def test_sas_dates_as_pandas(self):
sas_file = os.path.join(self.basic_data_folder, "dates.sas7bdat")
df_sas, meta = pyreadstat.read_sas7bdat(sas_file, dates_as_pandas_datetime=True, output_format=self.backend)
self.assertTrue(df_sas.equals(self.df_sas_dates_as_pandas))


def test_sas_fractional_seconds(self):
sas_file = os.path.join(self.basic_data_folder, "fractional_seconds.sas7bdat")
df_sas, meta = pyreadstat.read_sas7bdat(sas_file, output_format=self.backend)
self.assertTrue(df_sas.equals(self.df_sas_fractional_seconds))


def test_sas_user_missing(self):
Expand Down
Loading