You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Thank you Mark for making Obsidian more accessible to Python users!! :-)
I was giving it a try with 40.076 files (incl. attachments). (Most of the MD files are generated and do not yet contain a lot of links and metadata.)
The method "gather" ran successfully in about 3 minutes! :-)
However, df = vault.get_all_file_metadata showed an error message.
Not sure if the following is of help to locate an issue.
ValueError Traceback (most recent call last)
Input In [12], in <cell line: 1>()
----> 1 df = vault.get_all_file_metadata()
File C:...\obsidiantools\api.py:1345, in Vault.get_all_file_metadata(self)
1343 warnings.warn('Only notes (md files) were used to build the graph. Set attachments=True in the connect method to show all file metadata.')
1344 else:
-> 1345 df_media = self.get_media_file_metadata()
1346 df_media['graph_category'] = np.where(
1347 df_media['file_exists'], 'attachment', 'nonexistent')
1348 df_canvas = self.get_canvas_file_metadata()
File C:...\obsidiantools\api.py:1249, in Vault._create_media_file_metadata_columns(self, df)
1242 df['abs_filepath'] = np.where(df['rel_filepath'].notna(),
1243 [self._dirpath / str(f)
1244 for f in df['rel_filepath'].tolist()],
1245 np.NaN)
1246 df['file_exists'] = pd.Series(
1247 np.logical_not(df.index.isin(self._nonexistent_media_files)),
1248 index=df.index)
-> 1249 df['n_backlinks'] = self._get_backlink_counts_for_media_files_only()
1250 df['modified_time'] = pd.to_datetime(
1251 [f.lstat().st_mtime if not pd.isna(f)
1252 else pd.NaT
1253 for f in df['abs_filepath'].tolist()],
1254 unit='s')
1255 return df
File C:...\pandas\core\frame.py:3655, in DataFrame.setitem(self, key, value)
3652 self._setitem_array([key], value)
3653 else:
3654 # set column
-> 3655 self._set_item(key, value)
File C:...\pandas\core\frame.py:3832, in DataFrame._set_item(self, key, value)
3822 def _set_item(self, key, value) -> None:
3823 """
3824 Add series to DataFrame in specified column.
3825
(...)
3830 ensure homogeneity.
3831 """
-> 3832 value = self._sanitize_column(value)
3834 if (
3835 key in self.columns
3836 and value.ndim == 1
3837 and not is_extension_array_dtype(value)
3838 ):
3839 # broadcast across multiple columns if necessary
3840 if not self.columns.is_unique or isinstance(self.columns, MultiIndex):
File C:...\pandas\core\common.py:557, in require_length_match(data, index)
553 """
554 Check the length of data matches the length of the index.
555 """
556 if len(data) != len(index):
--> 557 raise ValueError(
558 "Length of values "
559 f"({len(data)}) "
560 "does not match length of index "
561 f"({len(index)})"
562 )
ValueError: Length of values (38135) does not match length of index (4216)
The text was updated successfully, but these errors were encountered:
Thank you Mark for making Obsidian more accessible to Python users!! :-)
I was giving it a try with 40.076 files (incl. attachments). (Most of the MD files are generated and do not yet contain a lot of links and metadata.)
The method "gather" ran successfully in about 3 minutes! :-)
However,
df = vault.get_all_file_metadata
showed an error message.Not sure if the following is of help to locate an issue.
ValueError Traceback (most recent call last)
Input In [12], in <cell line: 1>()
----> 1 df = vault.get_all_file_metadata()
File C:...\obsidiantools\api.py:1345, in Vault.get_all_file_metadata(self)
1343 warnings.warn('Only notes (md files) were used to build the graph. Set attachments=True in the connect method to show all file metadata.')
1344 else:
-> 1345 df_media = self.get_media_file_metadata()
1346 df_media['graph_category'] = np.where(
1347 df_media['file_exists'], 'attachment', 'nonexistent')
1348 df_canvas = self.get_canvas_file_metadata()
File C:...\obsidiantools\api.py:1234, in Vault.get_media_file_metadata(self)
1232 return df
1233 else:
-> 1234 df = df.pipe(self._create_media_file_metadata_columns)
1235 return df
File C:...\pandas\core\generic.py:5512, in NDFrame.pipe(self, func, *args, **kwargs)
5454 @Final
5455 @doc(klass=_shared_doc_kwargs["klass"])
5456 def pipe(
(...)
5460 **kwargs,
5461 ) -> T:
5462 r"""
5463 Apply chainable functions that expect Series or DataFrames.
5464
(...)
5510 ... ) # doctest: +SKIP
5511 """
-> 5512 return com.pipe(self, func, *args, **kwargs)
File C:...\pandas\core\common.py:497, in pipe(obj, func, *args, **kwargs)
495 return func(*args, **kwargs)
496 else:
--> 497 return func(obj, *args, **kwargs)
File C:...\obsidiantools\api.py:1249, in Vault._create_media_file_metadata_columns(self, df)
1242 df['abs_filepath'] = np.where(df['rel_filepath'].notna(),
1243 [self._dirpath / str(f)
1244 for f in df['rel_filepath'].tolist()],
1245 np.NaN)
1246 df['file_exists'] = pd.Series(
1247 np.logical_not(df.index.isin(self._nonexistent_media_files)),
1248 index=df.index)
-> 1249 df['n_backlinks'] = self._get_backlink_counts_for_media_files_only()
1250 df['modified_time'] = pd.to_datetime(
1251 [f.lstat().st_mtime if not pd.isna(f)
1252 else pd.NaT
1253 for f in df['abs_filepath'].tolist()],
1254 unit='s')
1255 return df
File C:...\pandas\core\frame.py:3655, in DataFrame.setitem(self, key, value)
3652 self._setitem_array([key], value)
3653 else:
3654 # set column
-> 3655 self._set_item(key, value)
File C:...\pandas\core\frame.py:3832, in DataFrame._set_item(self, key, value)
3822 def _set_item(self, key, value) -> None:
3823 """
3824 Add series to DataFrame in specified column.
3825
(...)
3830 ensure homogeneity.
3831 """
-> 3832 value = self._sanitize_column(value)
3834 if (
3835 key in self.columns
3836 and value.ndim == 1
3837 and not is_extension_array_dtype(value)
3838 ):
3839 # broadcast across multiple columns if necessary
3840 if not self.columns.is_unique or isinstance(self.columns, MultiIndex):
File C:...\pandas\core\frame.py:4538, in DataFrame._sanitize_column(self, value)
4535 return _reindex_for_setitem(value, self.index)
4537 if is_list_like(value):
-> 4538 com.require_length_match(value, self.index)
4539 return sanitize_array(value, self.index, copy=True, allow_2d=True)
File C:...\pandas\core\common.py:557, in require_length_match(data, index)
553 """
554 Check the length of data matches the length of the index.
555 """
556 if len(data) != len(index):
--> 557 raise ValueError(
558 "Length of values "
559 f"({len(data)}) "
560 "does not match length of index "
561 f"({len(index)})"
562 )
ValueError: Length of values (38135) does not match length of index (4216)
The text was updated successfully, but these errors were encountered: