Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

InternalServerError while writing large json data. #429

Open
KeerthiYandaOS opened this issue Sep 28, 2023 · 0 comments
Open

InternalServerError while writing large json data. #429

KeerthiYandaOS opened this issue Sep 28, 2023 · 0 comments

Comments

@KeerthiYandaOS
Copy link
Contributor

KeerthiYandaOS commented Sep 28, 2023

While writing a json data with a large number of records (dataframe.size() "4263942") using abfss path, seeing the Internal Server Error from adlfs package while processing the chunks of data. The error seems to be consistent and not the glitch. Having an Internal Server Error isn't helpful in this case. Can you please help in looking at the issue and if it is a valid failing scenario, can we please pop the actual error message to understand the error better?

Error:

File ~/cluster-env/trident_env/lib/python3.10/site-packages/adlfs/spec.py:2083, in AzureBlobFile._async_upload_chunk(self, final, **kwargs)
   2079                 await bc.commit_block_list(
   2080                     block_list=block_list, metadata=self.metadata
   2081                 )
   2082         else:
-> 2083             raise RuntimeError(f"Failed to upload block{e}!") from e
   2084 elif self.mode == "ab":
   2085     async with self.container_client.get_blob_client(blob=self.blob) as bc:

RuntimeError: Failed to upload blockInternal Server Error

Full Stacktrace:

RuntimeError                              Traceback (most recent call last)
Cell In[35], line 86
---> 86 df.to_json(filenameActivity,orient="records",lines=True)
     87 print("File has been saved as", filenameActivity)

File ~/cluster-env/trident_env/lib/python3.10/site-packages/pandas/core/generic.py:2650, in NDFrame.to_json(self, path_or_buf, orient, date_format, double_precision, force_ascii, date_unit, default_handler, lines, compression, index, indent, storage_options)
   2647 config.is_nonnegative_int(indent)
   2648 indent = indent or 0
-> 2650 return json.to_json(
   2651     path_or_buf=path_or_buf,
   2652     obj=self,
   2653     orient=orient,
   2654     date_format=date_format,
   2655     double_precision=double_precision,
   2656     force_ascii=force_ascii,
   2657     date_unit=date_unit,
   2658     default_handler=default_handler,
   2659     lines=lines,
   2660     compression=compression,
   2661     index=index,
   2662     indent=indent,
   2663     storage_options=storage_options,
   2664 )

File ~/cluster-env/trident_env/lib/python3.10/site-packages/pandas/io/json/_json.py:178, in to_json(path_or_buf, obj, orient, date_format, double_precision, force_ascii, date_unit, default_handler, lines, compression, index, indent, storage_options)
    174     s = convert_to_line_delimits(s)
    176 if path_or_buf is not None:
    177     # apply compression and byte/text conversion
--> 178     with get_handle(
    179         path_or_buf, "w", compression=compression, storage_options=storage_options
    180     ) as handles:
    181         handles.handle.write(s)
    182 else:

File ~/cluster-env/trident_env/lib/python3.10/site-packages/pandas/io/common.py:133, in IOHandles.__exit__(self, *args)
    132 def __exit__(self, *args: Any) -> None:
--> 133     self.close()

File ~/cluster-env/trident_env/lib/python3.10/site-packages/pandas/io/common.py:125, in IOHandles.close(self)
    123     self.created_handles.remove(self.handle)
    124 for handle in self.created_handles:
--> 125     handle.close()
    126 self.created_handles = []
    127 self.is_wrapped = False

File ~/cluster-env/trident_env/lib/python3.10/site-packages/adlfs/spec.py:1919, in AzureBlobFile.close(self)
   1917 """Close file and azure client."""
   1918 asyncio.run_coroutine_threadsafe(close_container_client(self), loop=self.loop)
-> 1919 super().close()

File ~/cluster-env/trident_env/lib/python3.10/site-packages/fsspec/spec.py:1789, in AbstractBufferedFile.close(self)
   1787 else:
   1788     if not self.forced:
-> 1789         self.flush(force=True)
   1791     if self.fs is not None:
   1792         self.fs.invalidate_cache(self.path)

File ~/cluster-env/trident_env/lib/python3.10/site-packages/fsspec/spec.py:1660, in AbstractBufferedFile.flush(self, force)
   1657         self.closed = True
   1658         raise
-> 1660 if self._upload_chunk(final=force) is not False:
   1661     self.offset += self.buffer.seek(0, 2)
   1662     self.buffer = io.BytesIO()

File ~/cluster-env/trident_env/lib/python3.10/site-packages/fsspec/asyn.py:115, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
    112 @functools.wraps(func)
    113 def wrapper(*args, **kwargs):
    114     self = obj or args[0]
--> 115     return sync(self.loop, func, *args, **kwargs)

File ~/cluster-env/trident_env/lib/python3.10/site-packages/fsspec/asyn.py:100, in sync(loop, func, timeout, *args, **kwargs)
     98     raise FSTimeoutError from return_result
     99 elif isinstance(return_result, BaseException):
--> 100     raise return_result
    101 else:
    102     return return_result

File ~/cluster-env/trident_env/lib/python3.10/site-packages/fsspec/asyn.py:55, in _runner(event, coro, result, timeout)
     53     coro = asyncio.wait_for(coro, timeout=timeout)
     54 try:
---> 55     result[0] = await coro
     56 except Exception as ex:
     57     result[0] = ex

File ~/cluster-env/trident_env/lib/python3.10/site-packages/adlfs/spec.py:2083, in AzureBlobFile._async_upload_chunk(self, final, **kwargs)
   2079                 await bc.commit_block_list(
   2080                     block_list=block_list, metadata=self.metadata
   2081                 )
   2082         else:
-> 2083             raise RuntimeError(f"Failed to upload block{e}!") from e
   2084 elif self.mode == "ab":
   2085     async with self.container_client.get_blob_client(blob=self.blob) as bc:

RuntimeError: Failed to upload blockInternal Server Error
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant