Storing & grouping by vessel trips • mfdb

Samples can be assigned to a trip, which provides a way of including socioeconomic data into MFDB.

The following examples use the following table_string helper to succintly define tables:

# Convert a string into a data.frame
table_string <- function (text, ...) read.table(
    text = text,
    blank.lines.skip = TRUE,
    header = TRUE,
    stringsAsFactors = FALSE,
    ...)

Then connect to a database and set up some areas/divisions:

mdb <- mfdb(tempfile(fileext = '.duckdb'))
mfdb_import_area(mdb, table_string('
name  division size
45G01     divA   10
45G02     divA  200
45G03     divB  400
'))

Importing data

Trips can have a start_port and end_port associated with them, before we can do this we need to define any ports we use. We do this with mfdb_import_port_taxonomy:

mfdb_import_port_taxonomy(mdb, table_string('
name    description     institute   latitude longitude
ISHFF   Hafnarfjordur   ISL         64.04   21.57
NOHVJ   Hvalfjordur     ISL         64.21   21.45
ISKEF   Keflavik    ISL         64.00   22.33
ISREK   Reykjavik   ISL         64.09   21.55
ISSEJ   Seydisfjordur   ISL         65.15   13.55
ISKJF   Skerjafjordur   ISL         64.07   21.54
'))

We can now define trips that use these ports:

mfdb_import_trip_taxonomy(mdb, table_string('
name    start_date  end_date    crew    oil_consumption start_port  end_port
T1  2019-01-21  2019-02-11  4   3000        ISKEF       ISREK
T2  2019-01-24  2019-02-14  5   9000        ISKEF       ISSEJ
T3  2019-04-21  2019-05-11  4   3000        ISREK       ISKEF
T4  2019-04-24  2019-05-14  5   9000        ISSEJ       ISKEF
'))

There is no formal link between trips and tows, so you can link a sample to a tow without recording trips. But we can create tows and vessels to use:

mfdb_import_tow_taxonomy(mdb, table_string('
name latitude longitude  depth length
 T1a     64.1    -23.15  98.82     10
 T1b     64.1    -23.15  98.82     10
 T2a     64.1    -23.15  98.82     10
 T2b     64.1    -23.15  98.82     10
 T3a     64.1    -23.15  98.82     10
 T3b     64.1    -23.15  98.82     10
 T4a     64.1    -23.15  98.82     10
 T4b     64.1    -23.15  98.82     10
'))

mfdb_import_vessel_taxonomy(mdb, table_string('
name full_name
V1  "Vessel 1"
V2  "Vessel 2"
'))

And finally import data that is associated to a vessel/trip/tow:

mfdb_import_survey(mdb, data_source = "cod2000",
table_string("
year    month   areacell   species vessel trip tow  length  count
2019    1       45G01      COD     V1     T1   T1a    10      358
2019    1       45G01      COD     V1     T1   T1a    20      320
2019    1       45G01      COD     V1     T1   T1a    30      162
2019    1       45G02      COD     V1     T1   T1b    10      240
2019    1       45G02      COD     V1     T1   T1b    20      278
2019    1       45G02      COD     V1     T1   T1b    30      122

2019    2       45G01      COD     V2     T2   T2a    10      255
2019    2       45G01      COD     V2     T2   T2a    20      138
2019    2       45G01      COD     V2     T2   T2a    30      168
2019    2       45G02      COD     V2     T2   T2b    10      349
2019    2       45G02      COD     V2     T2   T2b    20      106
2019    2       45G02      COD     V2     T2   T2b    30      262

2019    4       45G01      COD     V1     T3   T3a    10      395
2019    4       45G01      COD     V1     T3   T3a    20      214
2019    4       45G01      COD     V1     T3   T3a    30      195
2019    4       45G02      COD     V1     T3   T3b    10      396
2019    4       45G02      COD     V1     T3   T3b    20      325
2019    4       45G02      COD     V1     T3   T3b    30      269

2019    5       45G01      COD     V2     T4   T4a    10      239
2019    5       45G01      COD     V2     T4   T4a    20      309
2019    5       45G01      COD     V2     T4   T4a    30      162
2019    5       45G02      COD     V2     T4   T4b    10      342
2019    5       45G02      COD     V2     T4   T4b    20      175
2019    5       45G02      COD     V2     T4   T4b    30      299
"))

Querying data

We can now use the mfdb_sample_* functions to select this data back out again.

We can group and filter by any of the trip attributes. For instance, to group/filter by trip_start_date:

agg_data <- mfdb_sample_count(mdb, c('trip_start_date', 'length'), list(
    trip_start_date = '2019-01-21',
    length = mfdb_unaggregated()))
agg_data

## $`0.0.0.0.0`
##   year step area trip_start_date length number
## 1  all  all  all      2019-01-21     10    598
## 2  all  all  all      2019-01-21     20    598
## 3  all  all  all      2019-01-21     30    284

Note that we’ve summed T1a and T1b, since their start date matches.

We can use mfdb_group() to aggregate several ports, for instance:

agg_data <- mfdb_sample_count(mdb, c('trip_start_port', 'length'), list(
    trip_start_port = mfdb_group(KEF = 'ISKEF', other = c('ISREK', 'ISSEJ')),
    length = mfdb_unaggregated()))
agg_data

## $`0.0.0.0.0`
##   year step area trip_start_port length number
## 1  all  all  all             KEF     10   1202
## 2  all  all  all             KEF     20    842
## 3  all  all  all             KEF     30    714
## 4  all  all  all           other     10   1372
## 5  all  all  all           other     20   1023
## 6  all  all  all           other     30    925

We can also use mfdb_dplyr_sample to re-extract data, joining with port metadata:

# TODO: Use native pipe syntax once available
x <- mfdb_dplyr_sample(mdb, c('trip_start_port', 'trip_end_port', 'trip_start_port_latitude'))
x <- dplyr::select(x, 'year', 'month', 'trip_start_port', 'trip_end_port', 'trip_start_port_latitude', 'count')
as.data.frame(x)

##    year month trip_start_port trip_end_port trip_start_port_latitude count
## 1  2019     1           ISKEF         ISREK                    64.00   358
## 2  2019     1           ISKEF         ISREK                    64.00   320
## 3  2019     1           ISKEF         ISREK                    64.00   162
## 4  2019     1           ISKEF         ISREK                    64.00   240
## 5  2019     1           ISKEF         ISREK                    64.00   278
## 6  2019     1           ISKEF         ISREK                    64.00   122
## 7  2019     2           ISKEF         ISSEJ                    64.00   255
## 8  2019     2           ISKEF         ISSEJ                    64.00   138
## 9  2019     2           ISKEF         ISSEJ                    64.00   168
## 10 2019     2           ISKEF         ISSEJ                    64.00   349
## 11 2019     2           ISKEF         ISSEJ                    64.00   106
## 12 2019     2           ISKEF         ISSEJ                    64.00   262
## 13 2019     4           ISREK         ISKEF                    64.09   395
## 14 2019     4           ISREK         ISKEF                    64.09   214
## 15 2019     4           ISREK         ISKEF                    64.09   195
## 16 2019     4           ISREK         ISKEF                    64.09   396
## 17 2019     4           ISREK         ISKEF                    64.09   325
## 18 2019     4           ISREK         ISKEF                    64.09   269
## 19 2019     5           ISSEJ         ISKEF                    65.15   239
## 20 2019     5           ISSEJ         ISKEF                    65.15   309
## 21 2019     5           ISSEJ         ISKEF                    65.15   162
## 22 2019     5           ISSEJ         ISKEF                    65.15   342
## 23 2019     5           ISSEJ         ISKEF                    65.15   175
## 24 2019     5           ISSEJ         ISKEF                    65.15   299

mfdb_disconnect(mdb)
options(unittest.output = NULL)