get_annotations() error
Hi @oliskir
I'm trying to filter annotations by a label and then extract them to a pandas df, but I'm getting a value error (ValueError: Trying to coerce float values to integers
). See below.
Maybe there's a mismatch between the dtype in min_freq_hz value in the sqlite db and what pandas expected?
conn = sqlite3.connect("hallo-aadb.sqlite")
indices = kdb.filter_annotation(conn, source_type=("SRKW", "S16"))
df = kdb.get_annotations(conn, indices)
ValueError Traceback (most recent call last)
Input In [5], in <cell line: 1>()
----> 1 df = kdb.get_annotations(conn, indices)
File ~/.pyenv/versions/3.10.4/envs/ketosam-env/lib/python3.10/site-packages/ketosam/db.py:1017, in get_annotations(conn, indices, ketos, tentative, top, ketos_v3)
1001 data[i].append(v)
1003 # collect in a Pandas DataFrame
1004 annot_tbl = pd.DataFrame(
1005 {
1006 "job_id": pd.Series(data[0], dtype="int"),
1007 "deployment_id": pd.Series(data[1], dtype="int"),
1008 "file_id": pd.Series(data[2], dtype="int"),
1009 "sound_source": pd.Series(data[3], dtype="str"),
1010 "sound_type": pd.Series(data[4], dtype="str"),
1011 "tentative_sound_source": pd.Series(data[5], dtype="str"),
1012 "tentative_sound_type": pd.Series(data[6], dtype="str"),
1013 "tag_id": pd.Series(data[7], dtype="object"),
1014 "start_utc": pd.Series(data[8], dtype="datetime64[ns]"),
1015 "duration_ms": pd.Series(data[9], dtype="float").astype("int"),
1016 "start_ms": pd.Series(data[10], dtype="float").astype("int"),
-> 1017 "freq_min_hz": pd.Series(data[11], dtype="int"),
1018 "freq_max_hz": pd.Series(data[12], dtype="int"),
1019 "channel": pd.Series(data[13], dtype="int"),
1020 "granularity": pd.Series(data[14], dtype="str"),
1021 "machine_prediction": pd.Series(data[15], dtype="object"),
1022 "comments": pd.Series(data[16], dtype="str"),
1023 "filename": pd.Series(data[17], dtype="str"),
1024 "relative_path": pd.Series(data[18], dtype="str"),
1025 "top_path": pd.Series(data[19], dtype="str"),
1026 "num_files": pd.Series(data[20], dtype="int"),
1027 "file_id_list": pd.Series(data[21], dtype="object"),
1028 }
1029 )
1031 # convert tag IDs to tag names
1032 rows = c.execute(f"SELECT id,name FROM tag").fetchall()
File ~/.pyenv/versions/3.10.4/envs/ketosam-env/lib/python3.10/site-packages/pandas/core/series.py:584, in Series.__init__(self, data, index, dtype, name, copy, fastpath)
582 data = data.copy()
583 else:
--> 584 data = sanitize_array(data, index, dtype, copy)
586 manager = _get_option("mode.data_manager", silent=True)
587 if manager == "block":
File ~/.pyenv/versions/3.10.4/envs/ketosam-env/lib/python3.10/site-packages/pandas/core/construction.py:648, in sanitize_array(data, index, dtype, copy, allow_2d)
645 subarr = np.array([], dtype=np.float64)
647 elif dtype is not None:
--> 648 subarr = _try_cast(data, dtype, copy)
650 else:
651 subarr = maybe_convert_platform(data)
File ~/.pyenv/versions/3.10.4/envs/ketosam-env/lib/python3.10/site-packages/pandas/core/construction.py:812, in _try_cast(arr, dtype, copy)
807 # GH#15832: Check if we are requesting a numeric dtype and
808 # that we can convert the data to the requested dtype.
809 elif dtype.kind in "iu":
810 # this will raise if we have e.g. floats
--> 812 subarr = maybe_cast_to_integer_array(arr, dtype)
813 else:
814 subarr = np.array(arr, dtype=dtype, copy=copy)
File ~/.pyenv/versions/3.10.4/envs/ketosam-env/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1699, in maybe_cast_to_integer_array(arr, dtype)
1695 if not np.isfinite(arr).all():
1696 raise IntCastingNaNError(
1697 "Cannot convert non-finite values (NA or inf) to integer"
1698 )
-> 1699 raise ValueError("Trying to coerce float values to integers")
1700 if arr.dtype == object:
1701 raise ValueError("Trying to coerce float values to integers")
ValueError: Trying to coerce float values to integers