diff --git a/pyreadstat/_readstat_parser.pyx b/pyreadstat/_readstat_parser.pyx index 12dd1e3..802fd4c 100644 --- a/pyreadstat/_readstat_parser.pyx +++ b/pyreadstat/_readstat_parser.pyx @@ -233,13 +233,13 @@ cdef object transform_datetime(py_datetime_format var_format, double tstamp, py_ return mydat.date() elif var_format == DATE_FORMAT_DATETIME: if output_format == "polars": - # we want to return seconds from unix + # we want to return timestamp in seconds if file_format == FILE_FORMAT_STATA: # tstamp is in millisecons - return (tstamp/1000) - unix_to_origin_secs + return (tstamp/1000) else: # tstamp in seconds - return tstamp - unix_to_origin_secs + return tstamp if file_format == FILE_FORMAT_STATA: # tstamp is in millisecons @@ -253,7 +253,8 @@ cdef object transform_datetime(py_datetime_format var_format, double tstamp, py_ # tstamp in seconds days = (floor(tstamp / 86400)) secs = (tstamp % 86400) - tdelta = timedelta_new(days, secs, 0) + usecs = (round(tstamp % 1 * 1e6)) + tdelta = timedelta_new(days, secs, usecs) #tdelta = timedelta(seconds=tstamp) mydat = origin + tdelta return mydat @@ -270,7 +271,8 @@ cdef object transform_datetime(py_datetime_format var_format, double tstamp, py_ # tstamp in seconds days = (floor(tstamp / 86400)) secs = (tstamp % 86400) - tdelta = timedelta_new(days, secs, 0) + usecs = (round(tstamp % 1 * 1e6)) + tdelta = timedelta_new(days, secs, usecs) #tdelta = timedelta(seconds=tstamp) mydat = origin + tdelta return mydat.time() @@ -1105,7 +1107,16 @@ cdef object dict_to_dataframe(object dict_data, data_container dc): if var_format == DATE_FORMAT_DATE: date_cols.append(column) if datetime_cols: - data_frame = data_frame.with_columns(pl.from_epoch(pl.col(*datetime_cols), time_unit='s')) + data_frame = data_frame.with_columns( + [ + pl.from_epoch( + (pl.col(c) % 1 * 1e6).round().cast(pl.Int64) + ( + pl.col(c).floor() * 1e6).cast(pl.Int64) - ( + pl.lit(dc.unix_to_origin_secs) * 1e6).cast(pl.Int64), + time_unit='us') + for c in datetime_cols if data_frame[c].len() > 0 + ] + ) if date_cols: data_frame = data_frame.with_columns(pl.from_epoch(pl.col(*date_cols), time_unit='d')) diff --git a/test_data/basic/fractional_seconds.csv b/test_data/basic/fractional_seconds.csv new file mode 100755 index 0000000..512d867 --- /dev/null +++ b/test_data/basic/fractional_seconds.csv @@ -0,0 +1,101 @@ +date,dtime,time +1993-06-10,1993-06-10T02:04:01.122463,02:04:01.122463 +2147-07-18,2147-07-18T02:16:02.883684,02:16:02.883684 +1739-04-22,1739-04-22T13:32:08.170115,13:32:08.170115 +2187-12-07,2187-12-07T15:41:41.567238,15:41:41.567238 +2278-04-20,2278-04-20T12:41:18.331215,12:41:18.331215 +2181-09-03,2181-09-03T16:20:48.795826,16:20:48.795826 +1992-01-21,1992-01-21T04:27:25.154369,04:27:25.154369 +1829-09-11,1829-09-11T10:47:37.282617,10:47:37.282618 +2294-10-29,2294-10-29T14:41:18.574982,14:41:18.574982 +1920-04-17,1920-04-17T16:31:07.566722,16:31:07.566723 +1868-07-28,1868-07-28T06:16:21.620745,06:16:21.620745 +2208-03-23,2208-03-23T05:13:20.026692,05:13:20.026692 +1929-05-15,1929-05-15T18:58:37.253180,18:58:37.253180 +1998-08-26,1998-08-26T02:03:58.743517,02:03:58.743517 +2122-11-24,2122-11-24T23:01:29.367663,23:01:29.367663 +1854-08-25,1854-08-25T01:56:49.077793,01:56:49.077793 +1936-01-09,1936-01-09T15:41:42.922773,15:41:42.922773 +1967-11-09,1967-11-09T15:23:02.071943,15:23:02.071943 +1703-11-02,1703-11-02T14:27:03.782796,14:27:03.782796 +1750-11-28,1750-11-28T08:52:49.219013,08:52:49.219013 +1734-07-09,1734-07-09T23:47:14.951282,23:47:14.951282 +2157-07-31,2157-07-31T21:52:15.157284,21:52:15.157284 +1950-01-07,1950-01-07T14:53:48.730921,14:53:48.730921 +1930-05-11,1930-05-11T03:24:48.220010,03:24:48.220011 +1772-06-29,1772-06-29T11:31:57.032263,11:31:57.032263 +2207-11-02,2207-11-02T16:23:11.818488,16:23:11.818488 +2275-05-17,2275-05-17T19:13:17.300388,19:13:17.300388 +1720-01-04,1720-01-04T17:34:54.322509,17:34:54.322509 +2273-01-24,2273-01-24T14:14:38.193558,14:14:38.193558 +1834-11-01,1834-11-01T06:04:17.738995,06:04:17.738995 +2266-10-02,2266-10-02T10:05:04.330017,10:05:04.330017 +1983-11-26,1983-11-26T12:30:57.127726,12:30:57.127726 +1967-11-22,1967-11-22T19:10:31.922508,19:10:31.922508 +1772-12-29,1772-12-29T17:17:40.509609,17:17:40.509609 +1920-08-05,1920-08-05T09:14:58.541695,09:14:58.541695 +2237-04-05,2237-04-05T11:10:54.366266,11:10:54.366266 +2285-04-22,2285-04-22T07:20:52.103394,07:20:52.103394 +1790-03-02,1790-03-02T11:20:35.978824,11:20:35.978824 +2222-09-28,2222-09-28T23:32:00.371672,23:32:00.371672 +2020-06-02,2020-06-02T11:24:03.381682,11:24:03.381682 +1942-09-06,1942-09-06T22:41:22.091431,22:41:22.091431 +1734-04-01,1734-04-01T02:06:18.583556,02:06:18.583556 +1710-04-28,1710-04-28T10:05:23.561800,10:05:23.561800 +1920-02-28,1920-02-28T14:46:28.076923,14:46:28.076923 +2059-01-28,2059-01-28T10:47:03.890320,10:47:03.890320 +1739-04-24,1739-04-24T22:59:28.010889,22:59:28.010889 +1911-07-10,1911-07-10T08:28:09.542829,08:28:09.542830 +2044-05-13,2044-05-13T15:42:33.791598,15:42:33.791598 +2281-02-02,2281-02-02T05:26:26.257008,05:26:26.257008 +1968-03-27,1968-03-27T22:34:47.154648,22:34:47.154648 +2237-11-09,2237-11-09T06:38:59.895151,06:38:59.895151 +1891-11-11,1891-11-11T03:51:21.527200,03:51:21.527201 +1769-05-08,1769-05-08T22:58:51.374555,22:58:51.374555 +2011-02-23,2011-02-23T20:37:24.734829,20:37:24.734829 +2211-07-10,2211-07-10T05:06:31.123780,05:06:31.123780 +2033-03-22,2033-03-22T06:28:12.726483,06:28:12.726483 +1824-10-31,1824-10-31T20:41:51.630083,20:41:51.630083 +1742-04-02,1742-04-02T22:15:15.392728,22:15:15.392728 +2286-01-29,2286-01-29T13:07:02.468262,13:07:02.468262 +1884-03-12,1884-03-12T07:45:14.166535,07:45:14.166534 +2096-04-26,2096-04-26T22:00:08.409092,22:00:08.409092 +1923-12-02,1923-12-02T13:15:39.641922,13:15:39.641922 +1723-06-12,1723-06-12T16:43:33.592489,16:43:33.592489 +2134-11-18,2134-11-18T06:18:01.839232,06:18:01.839232 +2269-02-20,2269-02-20T00:25:17.618843,00:25:17.618843 +1854-04-22,1854-04-22T05:25:48.059167,05:25:48.059166 +1968-07-13,1968-07-13T22:57:57.493756,22:57:57.493756 +1977-01-08,1977-01-08T09:30:29.495407,09:30:29.495407 +2010-12-19,2010-12-19T11:18:26.461924,11:18:26.461924 +2163-09-13,2163-09-13T18:33:24.278122,18:33:24.278122 +1730-01-15,1730-01-15T21:39:25.275543,21:39:25.275543 +1978-09-24,1978-09-24T23:11:39.162304,23:11:39.162304 +2224-02-08,2224-02-08T15:45:01.422703,15:45:01.422703 +1787-02-04,1787-02-04T11:01:01.320380,11:01:01.320380 +1964-01-18,1964-01-18T06:23:14.746125,06:23:14.746125 +1788-12-09,1788-12-09T09:30:16.346816,09:30:16.346816 +2290-09-07,2290-09-07T01:57:16.982105,01:57:16.982105 +2035-02-14,2035-02-14T23:34:56.107008,23:34:56.107008 +1905-02-18,1905-02-18T19:29:49.899169,19:29:49.899170 +2219-10-01,2219-10-01T20:28:45.250220,20:28:45.250220 +1781-08-02,1781-08-02T22:07:25.500849,22:07:25.500849 +1820-07-20,1820-07-20T13:07:18.717742,13:07:18.717742 +2171-03-31,2171-03-31T13:50:34.930294,13:50:34.930294 +2148-06-30,2148-06-30T07:32:48.692223,07:32:48.692223 +2093-03-04,2093-03-04T14:22:35.691149,14:22:35.691149 +2219-01-17,2219-01-17T20:32:47.025956,20:32:47.025956 +2032-06-27,2032-06-27T02:51:01.604809,02:51:01.604809 +1894-04-03,1894-04-03T00:15:54.122685,00:15:54.122684 +1960-02-06,1960-02-06T14:44:53.032016,14:44:53.032016 +1955-10-09,1955-10-09T18:14:44.323325,18:14:44.323324 +2195-06-21,2195-06-21T22:32:46.631441,22:32:46.631441 +1919-12-11,1919-12-11T19:11:02.762172,19:11:02.762173 +1741-05-21,1741-05-21T18:16:05.399772,18:16:05.399772 +1971-10-11,1971-10-11T00:14:23.048366,00:14:23.048366 +1810-04-22,1810-04-22T13:43:24.608671,13:43:24.608671 +2198-05-30,2198-05-30T12:31:55.632376,12:31:55.632376 +1774-04-27,1774-04-27T07:31:02.189986,07:31:02.189986 +2205-05-18,2205-05-18T10:39:57.032547,10:39:57.032547 +1875-08-15,1875-08-15T16:15:21.807336,16:15:21.807335 +2074-07-21,2074-07-21T08:35:02.723811,08:35:02.723811 diff --git a/test_data/basic/fractional_seconds.sas7bdat b/test_data/basic/fractional_seconds.sas7bdat new file mode 100755 index 0000000..981a8a0 Binary files /dev/null and b/test_data/basic/fractional_seconds.sas7bdat differ diff --git a/tests/test_narwhalified.py b/tests/test_narwhalified.py index 9721a1d..01cfb0c 100644 --- a/tests/test_narwhalified.py +++ b/tests/test_narwhalified.py @@ -171,6 +171,22 @@ def _prepare_data(self): self.df_sas_dates = df_dates2.to_native() #schema = {"date": nw.Date, "dtime": nw.Datetime("ns"), "time": nw.Time()} self.df_sas_dates2 = nw.concat([df_dates2, nw.from_dict({"date":[None], "dtime":[None], "time":[None]}, backend=backend)]).to_native() #, schema=schema + + # datetime and time variables with fractional seconds as well as unusual date, time and datetime formats + sas_fractional_seconds = os.path.join(self.basic_data_folder, "fractional_seconds.csv") + if backend == "polars": + kwds["try_parse_dates"] = True + df_fractional_seconds_raw = nw.read_csv(sas_fractional_seconds,backend=backend, **kwds) + df_fractional_seconds1 = df_fractional_seconds_raw.clone() + df_fractional_seconds1 = df_fractional_seconds1.to_native() + if backend == "pandas": + df_fractional_seconds1["date"] = pd.to_datetime(df_fractional_seconds1["date"]) + df_fractional_seconds1["date"] = df_fractional_seconds1["date"].apply(lambda x: x.date()) + df_fractional_seconds1["dtime"] = pd.to_datetime(df_fractional_seconds1["dtime"]) + df_fractional_seconds1["time"] = pd.to_datetime(df_fractional_seconds1["time"], format='%H:%M:%S.%f') + df_fractional_seconds1["time"] = df_fractional_seconds1["time"].apply(lambda x: x.time()) + self.df_sas_fractional_seconds = df_fractional_seconds1 + # character column with nan and object column with nan (object pyreadstat writer doesn't know what to do with) if backend == "pandas": self.df_charnan = pd.DataFrame([[0,np.nan,np.nan],[1,"test", timedelta]], columns = ["integer", "string", "object"]) @@ -574,7 +590,11 @@ def test_sas_dates_as_pandas(self): sas_file = os.path.join(self.basic_data_folder, "dates.sas7bdat") df_sas, meta = pyreadstat.read_sas7bdat(sas_file, dates_as_pandas_datetime=True, output_format=self.backend) self.assertTrue(df_sas.equals(self.df_sas_dates_as_pandas)) - + + def test_sas_fractional_seconds(self): + sas_file = os.path.join(self.basic_data_folder, "fractional_seconds.sas7bdat") + df_sas, meta = pyreadstat.read_sas7bdat(sas_file, output_format=self.backend) + self.assertTrue(df_sas.equals(self.df_sas_fractional_seconds)) def test_sas_user_missing(self):