Skip to content

Commit

Permalink
SNOW-749414 adding code examples (snowflakedb#734)
Browse files Browse the repository at this point in the history
Co-authored-by: Afroz Alam <[email protected]>
  • Loading branch information
sfc-gh-mkeller and sfc-gh-aalam authored Apr 4, 2023
1 parent a376a04 commit 3bcdb5c
Show file tree
Hide file tree
Showing 2 changed files with 229 additions and 18 deletions.
237 changes: 219 additions & 18 deletions src/snowflake/snowpark/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,13 @@ def lit(literal: LiteralType) -> Column:

def sql_expr(sql: str) -> Column:
"""Creates a :class:`~snowflake.snowpark.Column` expression from raw SQL text.
Note that the function does not interpret or check the SQL text."""
Note that the function does not interpret or check the SQL text.
Example::
>>> df = session.create_dataframe([[1, 2], [3, 4]], schema=["A", "B"])
>>> df.filter("a > 1").collect() # use SQL expression
[Row(A=3, B=4)]
"""
return Column._expr(sql)


Expand Down Expand Up @@ -733,45 +739,102 @@ def mode(e: ColumnOrName) -> Column:

def skew(e: ColumnOrName) -> Column:
"""Returns the sample skewness of non-NULL records. If all records inside a group
are NULL, the function returns NULL."""
are NULL, the function returns NULL.
Example::
>>> from snowflake.snowpark.types import DecimalType
>>> df = session.create_dataframe(
... [10, 10, 20, 25, 30],
... schema=["a"]
... ).select(skew("a").cast(DecimalType(scale=4)))
>>> df.collect()
[Row(CAST (SKEW("A") AS NUMBER(38, 4))=Decimal('0.0524'))]
"""
c = _to_col_if_str(e, "skew")
return builtin("skew")(c)


def stddev(e: ColumnOrName) -> Column:
"""Returns the sample standard deviation (square root of sample variance) of
non-NULL values. If all records inside a group are NULL, returns NULL."""
non-NULL values. If all records inside a group are NULL, returns NULL.
Example::
>>> from snowflake.snowpark.types import DecimalType
>>> df = session.create_dataframe(
... [4, 9],
... schema=["N"],
... ).select(stddev(col("N")).cast(DecimalType(scale=4)))
>>> df.collect()
[Row(CAST (STDDEV("N") AS NUMBER(38, 4))=Decimal('3.5355'))]
"""
c = _to_col_if_str(e, "stddev")
return builtin("stddev")(c)


def stddev_samp(e: ColumnOrName) -> Column:
"""Returns the sample standard deviation (square root of sample variance) of
non-NULL values. If all records inside a group are NULL, returns NULL. Alias of
:func:`stddev`."""
:func:`stddev`.
Example::
>>> from snowflake.snowpark.types import DecimalType
>>> df = session.create_dataframe(
... [4, 9],
... schema=["N"],
... ).select(stddev_samp(col("N")).cast(DecimalType(scale=4)))
>>> df.collect()
[Row(CAST (STDDEV_SAMP("N") AS NUMBER(38, 4))=Decimal('3.5355'))]
"""
c = _to_col_if_str(e, "stddev_samp")
return builtin("stddev_samp")(c)


def stddev_pop(e: ColumnOrName) -> Column:
"""Returns the population standard deviation (square root of variance) of non-NULL
values. If all records inside a group are NULL, returns NULL."""
values. If all records inside a group are NULL, returns NULL.
Example::
>>> df = session.create_dataframe(
... [4, 9],
... schema=["N"],
... ).select(stddev_pop(col("N")))
>>> df.collect()
[Row(STDDEV_POP("N")=2.5)]
"""
c = _to_col_if_str(e, "stddev_pop")
return builtin("stddev_pop")(c)


def sum(e: ColumnOrName) -> Column:
"""Returns the sum of non-NULL records in a group. You can use the DISTINCT keyword
to compute the sum of unique non-null values. If all records inside a group are
NULL, the function returns NULL."""
NULL, the function returns NULL.
Example::
>>> df = session.create_dataframe(
... [4, 9],
... schema=["N"],
... ).select(sum(col("N")))
>>> df.collect()
[Row(SUM("N")=13)]
"""
c = _to_col_if_str(e, "sum")
return builtin("sum")(c)


def sum_distinct(e: ColumnOrName) -> Column:
"""Returns the sum of non-NULL distinct records in a group. You can use the
DISTINCT keyword to compute the sum of unique non-null values. If all records
inside a group are NULL, the function returns NULL."""
inside a group are NULL, the function returns NULL.
Example::
>>> df = session.create_dataframe(
... [1, 2, None, 3],
... schema=["N"],
... ).select(sum_distinct(col("N")))
>>> df.collect()
[Row(SUM( DISTINCT "N")=6)]
"""
c = _to_col_if_str(e, "sum_distinct")
return _call_function("sum", True, c)

Expand Down Expand Up @@ -1116,6 +1179,11 @@ def seq1(sign: int = 0) -> Column:
See Also:
- :meth:`Session.generator`, which can be used to generate in tandem with `seq1` to
generate sequences.
Example::
>>> df = session.generator(seq1(0), rowcount=3)
>>> df.collect()
[Row(SEQ1(0)=0), Row(SEQ1(0)=1), Row(SEQ1(0)=2)]
"""
return builtin("seq1")(Literal(sign))

Expand All @@ -1131,6 +1199,11 @@ def seq2(sign: int = 0) -> Column:
See Also:
- :meth:`Session.generator`, which can be used to generate in tandem with `seq2` to
generate sequences.
Example::
>>> df = session.generator(seq2(0), rowcount=3)
>>> df.collect()
[Row(SEQ2(0)=0), Row(SEQ2(0)=1), Row(SEQ2(0)=2)]
"""
return builtin("seq2")(Literal(sign))

Expand All @@ -1146,6 +1219,11 @@ def seq4(sign: int = 0) -> Column:
See Also:
- :meth:`Session.generator`, which can be used to generate in tandem with `seq4` to
generate sequences.
Example::
>>> df = session.generator(seq4(0), rowcount=3)
>>> df.collect()
[Row(SEQ4(0)=0), Row(SEQ4(0)=1), Row(SEQ4(0)=2)]
"""
return builtin("seq4")(Literal(sign))

Expand All @@ -1161,6 +1239,11 @@ def seq8(sign: int = 0) -> Column:
See Also:
- :meth:`Session.generator`, which can be used to generate in tandem with `seq8` to
generate sequences.
Example::
>>> df = session.generator(seq8(0), rowcount=3)
>>> df.collect()
[Row(SEQ8(0)=0), Row(SEQ8(0)=1), Row(SEQ8(0)=2)]
"""
return builtin("seq8")(Literal(sign))

Expand Down Expand Up @@ -1207,7 +1290,16 @@ def div0(


def sqrt(e: ColumnOrName) -> Column:
"""Returns the square-root of a non-negative numeric expression."""
"""Returns the square-root of a non-negative numeric expression.
Example::
>>> df = session.create_dataframe(
... [4, 9],
... schema=["N"],
... ).select(sqrt(col("N")))
>>> df.collect()
[Row(SQRT("N")=2.0), Row(SQRT("N")=3.0)]
"""
c = _to_col_if_str(e, "sqrt")
return builtin("sqrt")(c)

Expand Down Expand Up @@ -1380,25 +1472,57 @@ def floor(e: ColumnOrName) -> Column:


def sin(e: ColumnOrName) -> Column:
"""Computes the sine of its argument; the argument should be expressed in radians."""
"""Computes the sine of its argument; the argument should be expressed in radians.
Example::
>>> from snowflake.snowpark.types import DecimalType
>>> df = session.generator(seq1(0), rowcount=3).select(sin(seq1(0)).cast(DecimalType(scale=4)))
>>> df.collect()
[Row(CAST (SIN(SEQ1(0)) AS NUMBER(38, 4))=Decimal('0.0000')), Row(CAST (SIN(SEQ1(0)) AS NUMBER(38, 4))=Decimal('0.8415')), Row(CAST (SIN(SEQ1(0)) AS NUMBER(38, 4))=Decimal('0.9093'))]
"""
c = _to_col_if_str(e, "sin")
return builtin("sin")(c)


def sinh(e: ColumnOrName) -> Column:
"""Computes the hyperbolic sine of its argument."""
"""Computes the hyperbolic sine of its argument.
Example::
>>> from snowflake.snowpark.types import DecimalType
>>> df = session.generator(seq1(0), rowcount=3).select(sinh(seq1(0)).cast(DecimalType(scale=4)))
>>> df.collect()
[Row(CAST (SINH(SEQ1(0)) AS NUMBER(38, 4))=Decimal('0.0000')), Row(CAST (SINH(SEQ1(0)) AS NUMBER(38, 4))=Decimal('1.1752')), Row(CAST (SINH(SEQ1(0)) AS NUMBER(38, 4))=Decimal('3.6269'))]
"""
c = _to_col_if_str(e, "sinh")
return builtin("sinh")(c)


def tan(e: ColumnOrName) -> Column:
"""Computes the tangent of its argument; the argument should be expressed in radians."""
"""Computes the tangent of its argument; the argument should be expressed in radians.
Example::
>>> from snowflake.snowpark.types import DecimalType
>>> df = session.create_dataframe([0, 1], schema=["N"]).select(
... tan(col("N")).cast(DecimalType(scale=4))
... )
>>> df.collect()
[Row(CAST (TAN("N") AS NUMBER(38, 4))=Decimal('0.0000')), Row(CAST (TAN("N") AS NUMBER(38, 4))=Decimal('1.5574'))]
"""
c = _to_col_if_str(e, "tan")
return builtin("tan")(c)


def tanh(e: ColumnOrName) -> Column:
"""Computes the hyperbolic tangent of its argument."""
"""Computes the hyperbolic tangent of its argument.
Example::
>>> from snowflake.snowpark.types import DecimalType
>>> df = session.create_dataframe([0, 1], schema=["N"]).select(
... tanh(col("N").cast(DecimalType(scale=4)))
... )
>>> df.collect()
[Row(TANH( CAST ("N" AS NUMBER(38, 4)))=0.0), Row(TANH( CAST ("N" AS NUMBER(38, 4)))=0.7615941559557649)]
"""
c = _to_col_if_str(e, "tanh")
return builtin("tanh")(c)

Expand Down Expand Up @@ -1457,14 +1581,26 @@ def md5(e: ColumnOrName) -> Column:


def sha1(e: ColumnOrName) -> Column:
"""Returns a 40-character hex-encoded string containing the 160-bit SHA-1 message digest."""
"""Returns a 40-character hex-encoded string containing the 160-bit SHA-1 message digest.
Example::
>>> df = session.create_dataframe(["a", "b"], schema=["col"]).select(sha1("col"))
>>> df.collect()
[Row(SHA1("COL")='86f7e437faa5a7fce15d1ddcb9eaeaea377667b8'), Row(SHA1("COL")='e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98')]
"""
c = _to_col_if_str(e, "sha1")
return builtin("sha1")(c)


def sha2(e: ColumnOrName, num_bits: int) -> Column:
"""Returns a hex-encoded string containing the N-bit SHA-2 message digest,
where N is the specified output digest size."""
where N is the specified output digest size.
Example::
>>> df = session.create_dataframe(["a", "b"], schema=["col"]).select(sha2("col", 256))
>>> df.collect()
[Row(SHA2("COL", 256)='ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb'), Row(SHA2("COL", 256)='3e23e8160039594a33894f6564e1b1348bbd7a0088d42c4acb73eeaed59c009d')]
"""
permitted_values = [0, 224, 256, 384, 512]
if num_bits not in permitted_values:
raise ValueError(
Expand Down Expand Up @@ -1696,7 +1832,13 @@ def reverse(col: ColumnOrName) -> Column:


def soundex(e: ColumnOrName) -> Column:
"""Returns a string that contains a phonetic representation of the input string."""
"""Returns a string that contains a phonetic representation of the input string.
Example::
>>> df = session.create_dataframe(["Marsha", "Marcia"], schema=["V"]).select(soundex(col("V")))
>>> df.collect()
[Row(SOUNDEX("V")='M620'), Row(SOUNDEX("V")='M620')]
"""
c = _to_col_if_str(e, "soundex")
return builtin("soundex")(c)

Expand Down Expand Up @@ -1840,7 +1982,29 @@ def split(
pattern: ColumnOrName,
) -> Column:
"""Splits a given string with a given separator and returns the result in an array
of strings. To specify a string separator, use the :func:`lit()` function."""
of strings. To specify a string separator, use the :func:`lit()` function.
Example::
>>> df = session.create_dataframe(
... [["many-many-words", "-"], ["hello--hello", "--"]],
... schema=["V", "D"],
... ).select(split(col("V"), col("D")))
>>> df.show()
-------------------------
|"SPLIT(""V"", ""D"")" |
-------------------------
|[ |
| "many", |
| "many", |
| "words" |
|] |
|[ |
| "hello", |
| "hello" |
|] |
-------------------------
<BLANKLINE>
"""
s = _to_col_if_str(str, "split")
p = _to_col_if_str(pattern, "split")
return builtin("split")(s, p)
Expand All @@ -1858,6 +2022,14 @@ def substring(
For ``pos``, 1 is the first character of the string in Snowflake database.
:func:`substr` is an alias of :func:`substring`.
Example::
>>> df = session.create_dataframe(
... ["abc", "def"],
... schema=["S"],
... ).select(substring(col("S"), 1, 1))
>>> df.collect()
[Row(SUBSTRING("S", 1, 1)='a'), Row(SUBSTRING("S", 1, 1)='d')]
"""
s = _to_col_if_str(str, "substring")
p = pos if isinstance(pos, Column) else lit(pos)
Expand Down Expand Up @@ -2129,7 +2301,16 @@ def contains(col: ColumnOrName, string: ColumnOrName) -> Column:


def startswith(col: ColumnOrName, str: ColumnOrName) -> Column:
"""Returns true if col starts with str."""
"""Returns true if col starts with str.
Example::
>>> df = session.create_dataframe(
... [["abc", "a"], ["abc", "s"]],
... schema=["S", "P"],
... ).select(startswith(col("S"), col("P")))
>>> df.collect()
[Row(STARTSWITH("S", "P")=True), Row(STARTSWITH("S", "P")=False)]
"""
c = _to_col_if_str(col, "startswith")
s = _to_col_if_str(str, "startswith")
return builtin("startswith")(c, s)
Expand Down Expand Up @@ -3569,7 +3750,18 @@ def parse_xml(e: ColumnOrName) -> Column:

def strip_null_value(col: ColumnOrName) -> Column:
"""Converts a JSON "null" value in the specified column to a SQL NULL value.
All other VARIANT values in the column are returned unchanged."""
All other VARIANT values in the column are returned unchanged.
Example::
>>> df = session.create_dataframe(
... ["null", "1"],
... schema=["S"],
... ).select(strip_null_value(parse_json(col("S"))).as_("B")).where(
... sql_expr("B is null")
... )
>>> df.collect()
[Row(B=None)]
"""
c = _to_col_if_str(col, "strip_null_value")
return builtin("strip_null_value")(c)

Expand Down Expand Up @@ -5787,6 +5979,15 @@ def sproc(
See Also:
:class:`~snowflake.snowpark.stored_procedure.StoredProcedureRegistration`
Example::
>>> from snowflake.snowpark.types import IntegerType
>>> @sproc(return_type=IntegerType(), input_types=[IntegerType(), IntegerType()], packages=["snowflake-snowpark-python"])
... def add_sp(session_, x, y):
... return session_.sql(f"SELECT {x} + {y}").collect()[0][0]
...
>>> add_sp(1, 1)
2
"""
session = session or snowflake.snowpark.session._get_active_session()
if func is None:
Expand Down
Loading

0 comments on commit 3bcdb5c

Please sign in to comment.