Skip to content

Commit

Permalink
Partial changes for VCF Character S->U
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Mar 5, 2024
1 parent 6b10a77 commit d47b30e
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
4 changes: 2 additions & 2 deletions sgkit/io/vcf/vcf_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def _vcf_type_to_numpy(
elif vcf_type == "Float":
return "f4", FLOAT32_MISSING, FLOAT32_FILL
elif vcf_type == "Character":
return "S1", CHAR_MISSING, CHAR_FILL
return "U1", CHAR_MISSING, CHAR_FILL
elif vcf_type == "String":
return "O", STR_MISSING, STR_FILL
raise ValueError(
Expand All @@ -188,7 +188,7 @@ def _vcf_type_to_numpy(

def _is_str_or_char(array: ArrayLike) -> bool:
"""Return True if the array is of string or character type"""
return array.dtype.kind in ("O", "S", "U")
return array.dtype.kind in ("O", "U")


class VcfFieldHandler:
Expand Down
8 changes: 5 additions & 3 deletions sgkit/io/vcf/vcf_writer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
FLOAT32_MISSING_AS_INT32,
INT_FILL,
INT_MISSING,
STR_MISSING,
)

COLON = ord(":")
Expand Down Expand Up @@ -316,7 +317,8 @@ def vcf_values_to_byte_buf_size(a):
elif a.dtype == np.float32:
# values + separators
return a.size * FLOAT32_BUF_SIZE + a.size
elif a.dtype.kind == "S":
elif a.dtype.kind == "U":
# NOTE! Assuming UTF-8 here?
# values + separators
return a.size * a.dtype.itemsize + a.size
else:
Expand Down Expand Up @@ -502,8 +504,8 @@ def create_mask(arr):
return np.all(arr == INT_MISSING, axis=axis)
elif arr.dtype == np.float32:
return np.all(arr.view("i4") == FLOAT32_MISSING_AS_INT32, axis=axis)
elif arr.dtype.kind == "S":
return np.all(arr == STR_MISSING_BYTE, axis=axis)
elif arr.dtype.kind == "U":
return np.all(arr == STR_MISSING, axis=axis)
else:
raise ValueError(f"Unsupported dtype: {arr.dtype}")

Expand Down

0 comments on commit d47b30e

Please sign in to comment.