Geospatial processing example using Sedona

Dependencies:

Generate a GeoJSON at Overpass-Turbo, using the following query:


/*
This has been generated by the overpass-turbo wizard.
The original search was:
"(route=hiking or highway=track or highway=path) in Bardonecchia"
*/
[out:json][timeout:25];
// fetch area "Bardonecchia" to search in
{{geocodeArea:Bardonecchia}}->.searchArea;
// gather results
(
  nwr["route"="hiking"](area.searchArea);
  nwr["highway"="track"](area.searchArea);
  nwr["highway"="path"](area.searchArea);
);
// print results
out geom;

Download the GeoJSON, copy it in the working directory, and open it in a spark session:

// geo-test.sc
import $ivy.`org.apache.spark::spark-core:3.5.0`
import $ivy.`org.apache.spark::spark-sql:3.5.0`
import $ivy.`org.apache.sedona::sedona-spark-shaded-3.4:1.5.0`

import org.apache.spark.sql.SparkSession
import org.apache.sedona.spark.SedonaContext
import org.apache.spark.sql.functions._

@main
def main() : Unit = {
  val spark = SparkSession
              .builder()
              .appName("Spark SQL basic example")
              .master("local[*]")
              .getOrCreate()

  val sedona = SedonaContext.create(spark)

  var rawDf = sedona
    .read
    .format("json")
    .option("multiline", "true")
    .json("Bardonecchia-hiking.geojson")
    .selectExpr("explode(features) as features")
    .select("features.*")
    .withColumn("geometry", expr("ST_GeomFromGeoJSON(geometry)"))

  rawDf.createOrReplaceTempView("rawdf")
  rawDf.printSchema()
}

Run using the following command:

JAVA_OPTS='--add-exports java.base/sun.nio.ch=ALL-UNNAMED' amm geo-test.sc