zarr-developers · TomNicholas · Nov 4, 2024 · Oct 28, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/docs/usage.md b/docs/usage.md
@@ -385,13 +385,13 @@ Currently you can only serialize in-memory variables to kerchunk references if t
 When you have many chunks, the reference file can get large enough to be unwieldy as json. In that case the references can be instead stored as parquet. Again this uses kerchunk internally.
 
 ```python
-combined_vds.virtualize.to_kerchunk('combined.parq', format='parquet')
+combined_vds.virtualize.to_kerchunk('combined.parquet', format='parquet')
 ```
 
 And again we can read these references using the "kerchunk" backend as if it were a regular Zarr store
 
 ```python
-combined_ds = xr.open_dataset('combined.parq', engine="kerchunk")
+combined_ds = xr.open_dataset('combined.parquet', engine="kerchunk")
 ```
 
 By default references are placed in separate parquet file when the total number of references exceeds `record_size`. If there are fewer than `categorical_threshold` unique urls referenced by a particular variable, url will be stored as a categorical variable.
@@ -444,9 +444,9 @@ You can open existing Kerchunk `json` or `parquet` references as Virtualizarr vi
 
 ```python
 
-vds = open_virtual_dataset('combined.json', format='kerchunk')
+vds = open_virtual_dataset('combined.json', filetype='kerchunk', indexes={})
 # or
-vds = open_virtual_dataset('combined.parquet', format='kerchunk')
+vds = open_virtual_dataset('combined.parquet', filetype='kerchunk', indexes={})
 
 ```
 

diff --git a/virtualizarr/readers/kerchunk.py b/virtualizarr/readers/kerchunk.py
@@ -38,7 +38,9 @@
         fs = _FsspecFSFromFilepath(filepath=filepath, reader_options=reader_options)
 
         # The kerchunk .parquet storage format isn't actually a parquet, but a directory that contains named parquets for each group/variable.
-        if fs.filepath.endswith("ref.parquet"):
+        if fs.filepath.endswith(".parquet") and fs.fs.isfile(
+            f"{fs.filepath}/.zmetadata"
+        ):
             from fsspec.implementations.reference import LazyReferenceMapper
 
             lrm = LazyReferenceMapper(filepath, fs.fs)
@@ -61,7 +63,7 @@
 
         else:
             raise ValueError(
-                "The input Kerchunk reference did not seem to be in Kerchunk's JSON or Parquet spec: https://fsspec.github.io/kerchunk/spec.html. The Kerchunk format autodetection is quite flaky, so if your reference matches the Kerchunk spec feel free to open an issue: https://github.com/zarr-developers/VirtualiZarr/issues"
+                "The input Kerchunk reference did not seem to be in Kerchunk's JSON or Parquet spec: https://fsspec.github.io/kerchunk/spec.html. If your Kerchunk generated reference file is in parquet, make sure the file extension is `.parquet`. The Kerchunk format autodetection is quite flaky, so if your reference matches the Kerchunk spec feel free to open an issue: https://github.com/zarr-developers/VirtualiZarr/issues"
             )
 
         # TODO would be more efficient to drop these before converting them into ManifestArrays, i.e. drop them from the kerchunk refs dict