/*
 * nasd_layout.c
 *
 * Layout control module for NASD embedded filesystem.
 *
 * Author: Jim Zelenka
 */
/*
 * Copyright (c) of Carnegie Mellon University, 1998,1999.
 *
 * Permission to reproduce, use, and prepare derivative works of
 * this software for internal use is granted provided the copyright
 * and "No Warranty" statements are included with all reproductions
 * and derivative works. This software may also be redistributed
 * without charge provided that the copyright and "No Warranty"
 * statements are included in all redistributions.
 *
 * NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS.
 * CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER
 * EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED
 * TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY
 * OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE
 * MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT
 * TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
 */


#include <nasd/nasd_options.h>
#include <nasd/nasd_drive_options.h>
#include <nasd/nasd_types.h>
#include <nasd/nasd_freelist.h>
#include <nasd/nasd_itypes.h>
#include <nasd/nasd_mem.h>
#include <nasd/nasd_cache.h>
#include <nasd/nasd_common.h>
#include <nasd/nasd_timer.h>
#include <nasd/nasd_security.h>
#include <nasd/nasd_sys.h>
#include <nasd/nasd_layout.h>

nasd_layout_switch_t
nasd_layout_switch[] = {
#if NASD_DRIVE_LAYOUT_SEQUENTIAL_INCLUDE > 0
  {
  's',
  "Sequential-clustered",
  nasd_nl_seq_cl_init,
  nasd_nl_seq_format,
  nasd_nl_seq_init_dynamic,
  nasd_nl_seq_get_node_block,
  nasd_nl_seq_node_fail_create,
  nasd_nl_seq_release_oneblock,
  nasd_nl_seq_release_blocks,
  nasd_nl_seq_adj_prealloc,
  nasd_nl_seq_surrender_prealloc,
  nasd_nl_seq_node_deleting,
  nasd_nl_seq_alloc_blocks},

  {
  'S',
  "Sequential",
  nasd_nl_seq_init,
  nasd_nl_seq_format,
  nasd_nl_seq_init_dynamic,
  nasd_nl_seq_get_node_block,
  nasd_nl_seq_node_fail_create,
  nasd_nl_seq_release_oneblock,
  nasd_nl_seq_release_blocks,
  nasd_nl_seq_adj_prealloc,
  nasd_nl_seq_surrender_prealloc,
  nasd_nl_seq_node_deleting,
  nasd_nl_seq_alloc_blocks},
#endif /* NASD_DRIVE_LAYOUT_SEQUENTIAL_INCLUDE > 0 */

#if NASD_DRIVE_LAYOUT_REGION_INCLUDE > 0
  {
  'r',
  "Region",
  nasd_nl_reg_init,
  nasd_nl_reg_format,
  nasd_nl_reg_init_dynamic,
  nasd_nl_reg_get_node_block,
  nasd_nl_reg_node_fail_create,
  nasd_nl_reg_release_oneblock,
  nasd_nl_reg_release_blocks,
  nasd_nl_reg_adj_prealloc,
  nasd_nl_reg_surrender_prealloc,
  nasd_nl_reg_node_deleting,
  nasd_nl_reg_alloc_blocks},

  {
  'R',
  "Region-no-clustering",
  nasd_nl_reg_nocl_init,
  nasd_nl_reg_format,
  nasd_nl_reg_init_dynamic,
  nasd_nl_reg_get_node_block,
  nasd_nl_reg_node_fail_create,
  nasd_nl_reg_release_oneblock,
  nasd_nl_reg_release_blocks,
  nasd_nl_reg_adj_prealloc,
  nasd_nl_reg_surrender_prealloc,
  nasd_nl_reg_node_deleting,
  nasd_nl_reg_alloc_blocks},
#endif /* NASD_DRIVE_LAYOUT_REGION_INCLUDE > 0 */
};

int nasd_od_nlayouts =
  (sizeof(nasd_layout_switch)/sizeof(nasd_layout_switch_t));

nasd_layout_switch_t *nasd_nl_cur = NULL;

nasd_ctrl_layout_stat_t nasd_drive_layout_stats;

#define LAYOUT_TIMING_DECL_GO \
  nasd_timespec_t ts; \
  nasd_timer_t tm; \
  \
  NASD_TM_START(&tm);

#define LAYOUT_TIMING_DONE(_func_) \
  NASD_TM_STOP(&tm); \
  NASD_TM_ELAPSED_TS(&tm, &ts); \
  NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_layout_stats.##_func_##_time, &ts);

nasd_status_t
nasd_od_layout_init(
  nasd_od_config_t  *config)
{
  nasd_status_t rc;
  int i;

  bzero((char *)&nasd_drive_layout_stats, sizeof(nasd_drive_layout_stats));
  nasd_drive_layout_stats.ctrl_id = NASD_CTRL_DRIVE_INFO;

  nasd_nl_cur = NULL;
  for(i=0;i<nasd_od_nlayouts;i++) {
    if (nasd_layout_switch[i].nl_type == config->layout_type) {
      nasd_nl_cur = &nasd_layout_switch[i];
      break;
    }
  }

  if (nasd_nl_cur == NULL)
    return(NASD_BAD_LAYOUT_TYPE);

  rc = nasd_nl_cur->nl_init(config);
  if (rc)
    return(rc);

  nasd_printf("DRIVE: using layout type %s\n", nasd_nl_cur->nl_name);
  /* bzero already null-terminated this in target */
  bcopy((char *)nasd_nl_cur->nl_name, nasd_drive_layout_stats.layout_name,
    NASD_MIN(strlen(nasd_nl_cur->nl_name), NASD_CTRL_LAYOUT_NAME_LEN));

  return(NASD_SUCCESS);
}

/*
 * Caller holds partition write lock.
 */
nasd_status_t
nasd_od_layout_get_node_block(
  int                      partnum,
  nasd_blkcnt_t            prealloc_blocks,
  nasd_layout_hint_t      *layout_hint,
  nasd_odc_exlist_ent_t  **exle_p,
  nasd_odc_exlist_ent_t  **pre_exle_p)
{
  nasd_odc_exlist_ent_t *pre_exle, *exle;
  nasd_blkcnt_t want_blocks;
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_status_t rc;
  LAYOUT_TIMING_DECL_GO

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  want_blocks = prealloc_blocks + 1;
  if (want_blocks > (part->part_size - part->blocks_allocated)) {
    LAYOUT_TIMING_DONE(get_node_block);
    return(NASD_NO_SPACE);
  }

  rc = nasd_nl_cur->nl_get_node_block(partnum, prealloc_blocks,
    layout_hint, &exle, &pre_exle);
  if (rc) {
    LAYOUT_TIMING_DONE(get_node_block);
    return(rc);
  }

  part->blocks_used++;
  part->blocks_allocated += want_blocks;

  nasd_part_modified(partnum);

  NASD_ASSERT(exle->range.first == exle->range.last);
  if (pre_exle) {
    NASD_ASSERT(pre_exle->next == NULL);
    NASD_ASSERT(pre_exle->prev == NULL);
  }

  *exle_p = exle;
  *pre_exle_p = pre_exle;

  LAYOUT_TIMING_DONE(get_node_block);
  return(rc);
}

/*
 * Caller holds partition write lock.
 */
nasd_status_t
nasd_od_layout_node_fail_create(
  int                     partnum,
  nasd_blkcnt_t           prealloc_blocks,
  nasd_odc_exlist_ent_t  *exle,
  nasd_odc_exlist_ent_t  *pre_exle)
{
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_status_t rc;
  LAYOUT_TIMING_DECL_GO

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  part->blocks_used--;
  part->blocks_allocated -= 1 + prealloc_blocks;

  nasd_part_modified(partnum);

  rc = nasd_nl_cur->nl_node_fail_create(partnum, exle, pre_exle);

  LAYOUT_TIMING_DONE(node_fail_create);
  return(rc);
}

/*
 * Caller holds partition write lock.
 * Block was "used".
 */
nasd_status_t
nasd_od_layout_release_oneblock(
  int            partnum,
  nasd_blkno_t   blknum,
  void          *layout_handle)
{
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_status_t rc;
  LAYOUT_TIMING_DECL_GO

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  part->blocks_allocated--;
  part->blocks_used--;

  nasd_part_modified(partnum);

  rc = nasd_nl_cur->nl_release_oneblock(partnum, blknum, layout_handle);

  LAYOUT_TIMING_DONE(release_oneblock);
  return(rc);
}

/*
 * Caller holds partition write lock.
 * Blocks were "used"
 */
nasd_status_t
nasd_od_layout_release_blocks(
  int                     partnum,
  nasd_odc_exlist_ent_t  *exle,
  void                   *layout_handle)
{
  nasd_blkcnt_t blocks_released;
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_status_t rc;
  LAYOUT_TIMING_DECL_GO

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  rc = nasd_nl_cur->nl_release_blocks(partnum, exle, &blocks_released,
    layout_handle);
  if (rc) {
    LAYOUT_TIMING_DONE(release_blocks);
    return(rc);
  }

  part->blocks_allocated -= blocks_released;
  part->blocks_used -= blocks_released;

  nasd_part_modified(partnum);

  LAYOUT_TIMING_DONE(release_blocks);
  return(NASD_SUCCESS);
}

/*
 * Call with node write lock held.
 * Caller does not hold partition locks.
 *
 * Adjust partition counts to show this preallocation.
 */
nasd_status_t
nasd_od_layout_init_adj_prealloc(
  int                              partnum,
  nasd_odc_ent_t                  *ne,
  nasd_blkcnt_t                    prealloc_blocks,
  nasd_odc_prealloc_adj_handle_t  *pah)
{
  nasd_odc_icpart_t *icp;
  nasd_blkcnt_t pb_avail;
  nasd_od_part_t *part;
  nasd_od_node_t *np;
  LAYOUT_TIMING_DECL_GO

  np = ne->data.node;

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  pah->prealloc_blocks = prealloc_blocks;

  if ((prealloc_blocks || np->blocks_preallocated)
    && (prealloc_blocks != np->blocks_preallocated))
  {
    pah->pb_need = prealloc_blocks - np->blocks_preallocated;
    NASD_ODC_ICPART_LOCK_WRITE(icp);
    pb_avail = part->part_size - part->blocks_allocated;
    if (pah->pb_need > pb_avail) {
      NASD_ODC_ICPART_UNLOCK_WRITE(icp);
      LAYOUT_TIMING_DONE(init_adj_prealloc);
      return(NASD_NO_SPACE);
    }
    part->blocks_allocated += pah->pb_need;
    np->blocks_preallocated += pah->pb_need;
    nasd_part_modified(partnum);
    NASD_ODC_ICPART_UNLOCK_WRITE(icp);
  }
  else {
    pah->pb_need = 0;
  }

  LAYOUT_TIMING_DONE(init_adj_prealloc);
  return(NASD_SUCCESS);
}

/*
 * Caller does not hold partition locks.
 *
 * Call with node write lock held. Cancel the result of
 * an init_adj_prealloc.
 */
nasd_status_t
nasd_od_layout_cancel_adj_prealloc(
  int                              partnum,
  nasd_odc_ent_t                  *ne,
  nasd_odc_prealloc_adj_handle_t  *pah)
{
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_od_node_t *np;
  LAYOUT_TIMING_DECL_GO

  np = ne->data.node;

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  if (pah->pb_need) {
    NASD_ODC_ICPART_LOCK_WRITE(icp);
    part->blocks_allocated -= pah->pb_need;
    np->blocks_preallocated -= pah->pb_need;
    nasd_part_modified(partnum);
    NASD_ODC_ICPART_UNLOCK_WRITE(icp);
  }

  LAYOUT_TIMING_DONE(cancel_adj_prealloc);
  return(NASD_SUCCESS);
}

/*
 * Caller does not hold partition locks.
 *
 * Actually perform adjustment of preallocated range on
 * an object.
 */
nasd_status_t
nasd_od_layout_adj_prealloc(
  int                              partnum,
  nasd_odc_ent_t                  *ne,
  nasd_odc_prealloc_adj_handle_t  *pah,
  int                              len_changed)
{
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_od_node_t *np;
  nasd_status_t rc;
  LAYOUT_TIMING_DECL_GO

  np = ne->data.node;

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  /*
   * Adjust layout of preallocated range as necessary (increase
   * by pb_need, which may be negative (logical decrease)).
   * If object length has changed, release our blocks and
   * re-do layout decisions.
   */

  NASD_ODC_ICPART_LOCK_WRITE(icp);

  rc = nasd_nl_cur->nl_adj_prealloc(partnum, ne, pah, len_changed);

  nasd_part_modified(partnum);

  NASD_ODC_ICPART_UNLOCK_WRITE(icp);

  LAYOUT_TIMING_DONE(adj_prealloc);
  return(rc);
}

/*
 * Caller must serialize node access (either hold node write
 * lock, or as in the case of object-delete, set object state
 * to disallow other accesses and wait for serial-okay).
 *
 * This operation instructs an object to not explictly hold
 * any blocks to meet its preallocation, but does not adjust
 * the preallocation itself, so may be performed on any object,
 * any time.
 */
nasd_status_t
nasd_od_layout_surrender_prealloc(
  int              partnum,
  nasd_odc_ent_t  *ne)
{
  nasd_status_t rc;
  LAYOUT_TIMING_DECL_GO

  rc = nasd_nl_cur->nl_surrender_prealloc(partnum, ne);

  LAYOUT_TIMING_DONE(surrender_prealloc);
  return(rc);
}

/*
 * Caller does not hold partition locks.
 *
 * Node is being deleted. nb is the block occupied
 * by the node block.
 */
nasd_status_t
nasd_od_layout_node_deleting(
  int              partnum,
  nasd_odc_ent_t  *ne)
{
  nasd_status_t rc;
  LAYOUT_TIMING_DECL_GO

  rc = nasd_odc_obj_disassoc(ne);
  if (rc) {
    LAYOUT_TIMING_DONE(node_deleting);
    return(rc);
  }

  nasd_odc_wait_not_busy(ne);

  rc = nasd_nl_cur->nl_node_deleting(partnum, ne);

  LAYOUT_TIMING_DONE(node_deleting);
  return(rc);
}

/*
 * Call with partition write lock held.
 * Call with object write lock held.
 *
 * Returns blocks in "used" state.
 */
nasd_status_t
nasd_od_layout_alloc_blocks(
  int                      partnum,
  nasd_odc_ent_t          *ne,
  nasd_blkcnt_t            needblks,
  nasd_blkno_t             blk_hint,
  nasd_odc_exlist_ent_t  **exle_p,
  nasd_blkcnt_t           *blocks_allocated_p)
{
  nasd_blkcnt_t blocks_allocated, get_blks;
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_status_t rc;
  LAYOUT_TIMING_DECL_GO

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  get_blks = NASD_MIN(needblks, part->part_size - part->blocks_allocated);

  rc = nasd_nl_cur->nl_alloc_blocks(partnum, ne, get_blks, blk_hint,
    exle_p, &blocks_allocated);
  if (rc) {
    *blocks_allocated_p = 0;
    LAYOUT_TIMING_DONE(alloc_blocks);
    return(rc);
  }

  part->blocks_allocated += blocks_allocated;
  part->blocks_used += blocks_allocated;
  nasd_part_modified(partnum);

  *blocks_allocated_p = blocks_allocated;

  LAYOUT_TIMING_DONE(alloc_blocks);
  return(NASD_SUCCESS);
}

/*
 * Call with partition write lock held.
 * Call with object write lock held.
 */
nasd_status_t
nasd_od_layout_get_prealloc(
  int                      partnum,
  nasd_odc_ent_t          *ne,
  nasd_blkcnt_t            needblks,
  nasd_odc_exlist_ent_t  **pre_exle_p,
  nasd_blkcnt_t           *gotp)
{
  nasd_blkcnt_t pbs_got, preallocblks_used, pbs_avail;
  nasd_odc_exlist_ent_t *pre_exle;
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_od_node_t *np;
  nasd_status_t rc;
  LAYOUT_TIMING_DECL_GO

  *pre_exle_p = NULL;
  *gotp = 0;

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  np = ne->data.node;

  /*
   * needblks is how many blocks we need to allocate
   * preallocblks_used is how many blocks of the logical
   *   preallocation we consume
   * pbs_avail is how many blocks are in preallocated range
   * pbs_got is how many preallocated blocks we used
   */
  pbs_got = 0;

  preallocblks_used = NASD_MIN(needblks, np->blocks_preallocated);
  if (np->prealloc_ex.first) {
    pbs_avail = np->prealloc_ex.last - np->prealloc_ex.first + 1;
    NASD_ASSERT(pbs_avail >= preallocblks_used);
    pbs_got = NASD_MIN(preallocblks_used, pbs_avail);
    rc = nasd_odc_get_extent_list(&pre_exle);
    if (rc != NASD_SUCCESS) {
      LAYOUT_TIMING_DONE(get_prealloc);
      return(rc);
    }
    pre_exle->range.first = np->prealloc_ex.first;
    pre_exle->range.last = pre_exle->range.first + pbs_got - 1;
    pre_exle->next = pre_exle->prev = NULL;
    if (pbs_avail == pbs_got) {
      /* consumed entire prealloc range */
      np->prealloc_ex.first = np->prealloc_ex.last = 0;
    }
    else {
      /*
       * Consumed partial prealloc range.
       * Do not preallocate another range, yet- wait until
       * after we do layout stuff below to avoid misordering
       * the object.
       */
      np->prealloc_ex.first = pre_exle->range.last + 1;
    }
  }
  else {
    pbs_avail = 0;
    pbs_got = 0;
    pre_exle = NULL;
  }

  np->blocks_preallocated -= preallocblks_used;
  np->blocks_allocated += preallocblks_used;

  part->blocks_used += preallocblks_used;

  LAYOUT_TIMING_DONE(get_prealloc);
  return(NASD_SUCCESS);
}

/*
 * Called at format time only.
 * Caller does not hold partition locks.
 *
 * Perform any actions required on format, such as setting
 * aside blocks for permanent storage, etc.
 */
nasd_status_t
nasd_od_layout_format(
  nasd_od_config_t  *config,
  nasd_blkno_t       first_real_data_blk)
{
  nasd_status_t rc;

  rc = nasd_nl_cur->nl_format(config, first_real_data_blk);
  return(rc);
}

/*
 * Called at startup time. Disk state is now loaded. Init any
 * state that requires disk state.
 */
nasd_status_t
nasd_od_layout_init_dynamic(
  nasd_blkno_t       first_real_data_blk)
{
  nasd_status_t rc;

  rc = nasd_nl_cur->nl_init_dynamic(first_real_data_blk);
  return(rc);
}

/* Local Variables:  */
/* indent-tabs-mode: nil */
/* tab-width: 2 */
/* End: */
