|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
Object org.apache.spark.sql.SQLContext
public class SQLContext
The entry point for working with structured data (rows and columns) in Spark. Allows the
creation of DataFrame
objects as well as the execution of SQL queries.
Nested Class Summary | |
---|---|
class |
SQLContext.implicits$
:: Experimental :: (Scala-specific) Implicit methods available in Scala for converting common Scala objects into DataFrame s. |
Constructor Summary | |
---|---|
SQLContext(JavaSparkContext sparkContext)
|
|
SQLContext(SparkContext sparkContext)
|
Method Summary | ||
---|---|---|
DataFrame |
applySchema(JavaRDD<?> rdd,
Class<?> beanClass)
|
|
DataFrame |
applySchema(JavaRDD<Row> rowRDD,
StructType schema)
|
|
DataFrame |
applySchema(RDD<?> rdd,
Class<?> beanClass)
|
|
DataFrame |
applySchema(RDD<Row> rowRDD,
StructType schema)
|
|
DataFrame |
baseRelationToDataFrame(BaseRelation baseRelation)
|
|
void |
cacheTable(String tableName)
Caches the specified table in-memory. |
|
void |
clearCache()
Removes all cached tables from the in-memory cache. |
|
DataFrame |
createDataFrame(JavaRDD<?> rdd,
Class<?> beanClass)
|
|
DataFrame |
createDataFrame(JavaRDD<Row> rowRDD,
StructType schema)
|
|
DataFrame |
createDataFrame(RDD<?> rdd,
Class<?> beanClass)
|
|
|
createDataFrame(RDD<A> rdd,
scala.reflect.api.TypeTags.TypeTag<A> evidence$3)
|
|
DataFrame |
createDataFrame(RDD<Row> rowRDD,
StructType schema)
|
|
|
createDataFrame(scala.collection.Seq<A> data,
scala.reflect.api.TypeTags.TypeTag<A> evidence$4)
|
|
DataFrame |
createExternalTable(String tableName,
String path)
|
|
DataFrame |
createExternalTable(String tableName,
String source,
java.util.Map<String,String> options)
|
|
DataFrame |
createExternalTable(String tableName,
String source,
scala.collection.immutable.Map<String,String> options)
|
|
DataFrame |
createExternalTable(String tableName,
String path,
String source)
|
|
DataFrame |
createExternalTable(String tableName,
String source,
StructType schema,
java.util.Map<String,String> options)
|
|
DataFrame |
createExternalTable(String tableName,
String source,
StructType schema,
scala.collection.immutable.Map<String,String> options)
|
|
void |
dropTempTable(String tableName)
|
|
DataFrame |
emptyDataFrame()
:: Experimental :: Returns a DataFrame with no rows or columns. |
|
ExperimentalMethods |
experimental()
:: Experimental :: A collection of methods that are considered experimental, but can be used to hook into the query planner for advanced functionality. |
|
scala.collection.immutable.Map<String,String> |
getAllConfs()
Return all the configuration properties that have been set (i.e. |
|
String |
getConf(String key)
Return the value of Spark SQL configuration property for the given key. |
|
String |
getConf(String key,
String defaultValue)
Return the value of Spark SQL configuration property for the given key. |
|
static SQLContext |
getOrCreate(SparkContext sparkContext)
Get the singleton SQLContext if it exists or create a new one using the given SparkContext. |
|
SQLContext.implicits$ |
implicits()
Accessor for nested Scala object |
|
boolean |
isCached(String tableName)
Returns true if the table is currently cached in-memory. |
|
DataFrame |
jdbc(String url,
String table)
Deprecated. As of 1.4.0, replaced by read().jdbc() . |
|
DataFrame |
jdbc(String url,
String table,
String[] theParts)
Deprecated. As of 1.4.0, replaced by read().jdbc() . |
|
DataFrame |
jdbc(String url,
String table,
String columnName,
long lowerBound,
long upperBound,
int numPartitions)
Deprecated. As of 1.4.0, replaced by read().jdbc() . |
|
DataFrame |
jsonFile(String path)
Deprecated. As of 1.4.0, replaced by read().json() . |
|
DataFrame |
jsonFile(String path,
double samplingRatio)
Deprecated. As of 1.4.0, replaced by read().json() . |
|
DataFrame |
jsonFile(String path,
StructType schema)
Deprecated. As of 1.4.0, replaced by read().json() . |
|
DataFrame |
jsonRDD(JavaRDD<String> json)
Deprecated. As of 1.4.0, replaced by read().json() . |
|
DataFrame |
jsonRDD(JavaRDD<String> json,
double samplingRatio)
Deprecated. As of 1.4.0, replaced by read().json() . |
|
DataFrame |
jsonRDD(JavaRDD<String> json,
StructType schema)
Deprecated. As of 1.4.0, replaced by read().json() . |
|
DataFrame |
jsonRDD(RDD<String> json)
Deprecated. As of 1.4.0, replaced by read().json() . |
|
DataFrame |
jsonRDD(RDD<String> json,
double samplingRatio)
Deprecated. As of 1.4.0, replaced by read().json() . |
|
DataFrame |
jsonRDD(RDD<String> json,
StructType schema)
Deprecated. As of 1.4.0, replaced by read().json() . |
|
DataFrame |
load(String path)
Deprecated. As of 1.4.0, replaced by read().load(path) . |
|
DataFrame |
load(String source,
java.util.Map<String,String> options)
Deprecated. As of 1.4.0, replaced by read().format(source).options(options).load() . |
|
DataFrame |
load(String source,
scala.collection.immutable.Map<String,String> options)
Deprecated. As of 1.4.0, replaced by read().format(source).options(options).load() . |
|
DataFrame |
load(String path,
String source)
Deprecated. As of 1.4.0, replaced by read().format(source).load(path) . |
|
DataFrame |
load(String source,
StructType schema,
java.util.Map<String,String> options)
Deprecated. As of 1.4.0, replaced by read().format(source).schema(schema).options(options).load() . |
|
DataFrame |
load(String source,
StructType schema,
scala.collection.immutable.Map<String,String> options)
Deprecated. As of 1.4.0, replaced by read().format(source).schema(schema).options(options).load() . |
|
DataFrame |
parquetFile(scala.collection.Seq<String> paths)
|
|
DataFrame |
parquetFile(String... paths)
Deprecated. As of 1.4.0, replaced by read().parquet() . |
|
DataFrame |
range(long end)
|
|
DataFrame |
range(long start,
long end)
|
|
DataFrame |
range(long start,
long end,
long step,
int numPartitions)
|
|
DataFrameReader |
read()
|
|
void |
setConf(java.util.Properties props)
Set Spark SQL configuration properties. |
|
void |
setConf(String key,
String value)
Set the given Spark SQL configuration property. |
|
SparkContext |
sparkContext()
|
|
DataFrame |
sql(String sqlText)
|
|
DataFrame |
table(String tableName)
|
|
String[] |
tableNames()
|
|
String[] |
tableNames(String databaseName)
|
|
DataFrame |
tables()
|
|
DataFrame |
tables(String databaseName)
|
|
UDFRegistration |
udf()
A collection of methods for registering user-defined functions (UDF). |
|
void |
uncacheTable(String tableName)
Removes the specified table from the in-memory cache. |
Methods inherited from class Object |
---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Methods inherited from interface org.apache.spark.Logging |
---|
initializeIfNecessary, initializeLogging, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning |
Constructor Detail |
---|
public SQLContext(SparkContext sparkContext)
public SQLContext(JavaSparkContext sparkContext)
Method Detail |
---|
public static SQLContext getOrCreate(SparkContext sparkContext)
sparkContext
- (undocumented)
public DataFrame parquetFile(String... paths)
read().parquet()
.
DataFrame
. This function returns an empty
DataFrame
if no paths are passed in.
paths
- (undocumented)
public SparkContext sparkContext()
public void setConf(java.util.Properties props)
props
- (undocumented)public void setConf(String key, String value)
key
- (undocumented)value
- (undocumented)public String getConf(String key)
key
- (undocumented)
public String getConf(String key, String defaultValue)
defaultValue
.
key
- (undocumented)defaultValue
- (undocumented)
public scala.collection.immutable.Map<String,String> getAllConfs()
public ExperimentalMethods experimental()
public DataFrame emptyDataFrame()
DataFrame
with no rows or columns.
public UDFRegistration udf()
The following example registers a Scala closure as UDF:
sqlContext.udf.register("myUdf", (arg1: Int, arg2: String) => arg2 + arg1)
The following example registers a UDF in Java:
sqlContext.udf().register("myUDF",
new UDF2<Integer, String, String>() {
@Override
public String call(Integer arg1, String arg2) {
return arg2 + arg1;
}
}, DataTypes.StringType);
Or, to use Java 8 lambda syntax:
sqlContext.udf().register("myUDF",
(Integer arg1, String arg2) -> arg2 + arg1,
DataTypes.StringType);
public boolean isCached(String tableName)
tableName
- (undocumented)
public void cacheTable(String tableName)
tableName
- (undocumented)public void uncacheTable(String tableName)
tableName
- (undocumented)public void clearCache()
public SQLContext.implicits$ implicits()
public <A extends scala.Product> DataFrame createDataFrame(RDD<A> rdd, scala.reflect.api.TypeTags.TypeTag<A> evidence$3)
public <A extends scala.Product> DataFrame createDataFrame(scala.collection.Seq<A> data, scala.reflect.api.TypeTags.TypeTag<A> evidence$4)
public DataFrame baseRelationToDataFrame(BaseRelation baseRelation)
public DataFrame createDataFrame(RDD<Row> rowRDD, StructType schema)
public DataFrame createDataFrame(JavaRDD<Row> rowRDD, StructType schema)
public DataFrame createDataFrame(RDD<?> rdd, Class<?> beanClass)
public DataFrame createDataFrame(JavaRDD<?> rdd, Class<?> beanClass)
public DataFrameReader read()
public DataFrame createExternalTable(String tableName, String path)
public DataFrame createExternalTable(String tableName, String path, String source)
public DataFrame createExternalTable(String tableName, String source, java.util.Map<String,String> options)
public DataFrame createExternalTable(String tableName, String source, scala.collection.immutable.Map<String,String> options)
public DataFrame createExternalTable(String tableName, String source, StructType schema, java.util.Map<String,String> options)
public DataFrame createExternalTable(String tableName, String source, StructType schema, scala.collection.immutable.Map<String,String> options)
public void dropTempTable(String tableName)
public DataFrame range(long start, long end)
public DataFrame range(long end)
public DataFrame range(long start, long end, long step, int numPartitions)
public DataFrame sql(String sqlText)
public DataFrame table(String tableName)
public DataFrame tables()
public DataFrame tables(String databaseName)
public String[] tableNames()
public String[] tableNames(String databaseName)
public DataFrame applySchema(RDD<Row> rowRDD, StructType schema)
public DataFrame applySchema(JavaRDD<Row> rowRDD, StructType schema)
public DataFrame applySchema(RDD<?> rdd, Class<?> beanClass)
public DataFrame applySchema(JavaRDD<?> rdd, Class<?> beanClass)
public DataFrame parquetFile(scala.collection.Seq<String> paths)
public DataFrame jsonFile(String path)
read().json()
.
DataFrame
.
It goes through the entire dataset once to determine the schema.
path
- (undocumented)
public DataFrame jsonFile(String path, StructType schema)
read().json()
.
DataFrame
.
path
- (undocumented)schema
- (undocumented)
public DataFrame jsonFile(String path, double samplingRatio)
read().json()
.
path
- (undocumented)samplingRatio
- (undocumented)
public DataFrame jsonRDD(RDD<String> json)
read().json()
.
DataFrame
.
It goes through the entire dataset once to determine the schema.
json
- (undocumented)
public DataFrame jsonRDD(JavaRDD<String> json)
read().json()
.
DataFrame
.
It goes through the entire dataset once to determine the schema.
json
- (undocumented)
public DataFrame jsonRDD(RDD<String> json, StructType schema)
read().json()
.
DataFrame
.
json
- (undocumented)schema
- (undocumented)
public DataFrame jsonRDD(JavaRDD<String> json, StructType schema)
read().json()
.
DataFrame
.
json
- (undocumented)schema
- (undocumented)
public DataFrame jsonRDD(RDD<String> json, double samplingRatio)
read().json()
.
DataFrame
.
json
- (undocumented)samplingRatio
- (undocumented)
public DataFrame jsonRDD(JavaRDD<String> json, double samplingRatio)
read().json()
.
DataFrame
.
json
- (undocumented)samplingRatio
- (undocumented)
public DataFrame load(String path)
read().load(path)
.
path
- (undocumented)
public DataFrame load(String path, String source)
read().format(source).load(path)
.
path
- (undocumented)source
- (undocumented)
public DataFrame load(String source, java.util.Map<String,String> options)
read().format(source).options(options).load()
.
source
- (undocumented)options
- (undocumented)
public DataFrame load(String source, scala.collection.immutable.Map<String,String> options)
read().format(source).options(options).load()
.
source
- (undocumented)options
- (undocumented)
public DataFrame load(String source, StructType schema, java.util.Map<String,String> options)
read().format(source).schema(schema).options(options).load()
.
source
- (undocumented)schema
- (undocumented)options
- (undocumented)
public DataFrame load(String source, StructType schema, scala.collection.immutable.Map<String,String> options)
read().format(source).schema(schema).options(options).load()
.
source
- (undocumented)schema
- (undocumented)options
- (undocumented)
public DataFrame jdbc(String url, String table)
read().jdbc()
.
DataFrame
representing the database table accessible via JDBC URL
url named table.
url
- (undocumented)table
- (undocumented)
public DataFrame jdbc(String url, String table, String columnName, long lowerBound, long upperBound, int numPartitions)
read().jdbc()
.
DataFrame
representing the database table accessible via JDBC URL
url named table. Partitions of the table will be retrieved in parallel based on the parameters
passed to this function.
columnName
- the name of a column of integral type that will be used for partitioning.lowerBound
- the minimum value of columnName
used to decide partition strideupperBound
- the maximum value of columnName
used to decide partition stridenumPartitions
- the number of partitions. the range minValue
-maxValue
will be split
evenly into this many partitionsurl
- (undocumented)table
- (undocumented)
public DataFrame jdbc(String url, String table, String[] theParts)
read().jdbc()
.
DataFrame
representing the database table accessible via JDBC URL
url named table. The theParts parameter gives a list expressions
suitable for inclusion in WHERE clauses; each one defines one partition
of the DataFrame
.
url
- (undocumented)table
- (undocumented)theParts
- (undocumented)
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |