#delim ;
prog def intext, rclass;
version 13.0;
/*
 Input a text file into a Stata data set in memory
 containing as many string variables as necessary,
 each containing a portion of the record
 with a user-specified maximum length.
*! Author: Roger Newson
*! Date: 31 March 2017
*/

syntax using , Generate(string) [ LEngth(integer 80) CLEAR ];
/*
  generate() is the prefix for generated string variables.
  length()is maximum length of string variables.
  clear() indicates that any existing data set in memory may be cleared.
*/

*
 Check that length() is legal
*;
local maxlength=c(maxstrlvarlen);
if `length'<=0 {;
  disp as error "Nonpositive length(`length') not allowed";
  error 498;
};
if `length'>`maxlength' {;
  disp as error "length(`length') greater than the maximum of `maxlength'";
  error 498;
};

*
 Measure number of records in input file and maximum record length
*;
tempname intf;
tempname curbyte;
file open `intf' `using', binary read;
local nobs=0;
local mrecl=0;
file read `intf' %1bu `curbyte';
while r(eof)==0 {;
  local nobs=`nobs'+1;
  local recl=0;
  while !inlist(`curbyte',10,13,.) {;
    local recl=`recl'+1;
    file read `intf' %1bu `curbyte';
  };
  if `recl'>`mrecl' {;
    local mrecl=`recl';
  };
  if `curbyte'==13 {;
    file read `intf' %1bu `curbyte';
  };
  if `curbyte'==10 {;
    file read `intf' %1bu `curbyte';
  };

};
file close `intf';
if `mrecl'==0 {;local nsect=1;};
else {;local nsect=int((`mrecl'-1)/`length')+1;};

disp as text "Number of records: " as result `nobs'
 _n as text "Maximum record length: " as result `mrecl'
 _n as text "Number of string variables of length `length' required as record sections: " as result `nsect';
 
preserve;

if "`clear'"!="" {;drop _all;lab drop _all;};

*
 Create initially empty generated string variables
*;
local newobs=max(_N,`nobs');
qui set obs `newobs';
forv i1=1(1)`nsect' {;qui gene str1 `generate'`i1'="";};

* Assign values to generated string variables *;
file open `intf' `using',binary read;
local obs=0;
file read `intf' %1bu `curbyte';
while r(eof)==0 {;
  local obs=`obs'+1;
  local recl=0;
  while !inlist(`curbyte',10,13,.) {;
    local recl=`recl'+1;
    local varseq=int((`recl'-1)/`length')+1;
    qui replace `generate'`varseq'=`generate'`varseq'+char(`curbyte') in `obs';
    file read `intf' %1bu `curbyte';
  };
  if `curbyte'==13 {;
    file read `intf' %1bu `curbyte';
  };
  if `curbyte'==10 {;
    file read `intf' %1bu `curbyte';
  };
};
file close `intf';

* Labels and formats for variables *;
forv i1=1(1)`nsect' {;
  local typei1:type `generate'`i1';
  local leni1=subinstr("`typei1'","str","",1);
  if `"`leni1'"'=="L" {;
    format `generate'`i1' %-`=c(maxstrvarlen)'s;
  };
  else {;
    format `generate'`i1' %-`leni1's;
  };
  lab var `generate'`i1' "Section `i1'";
};

restore,not;

* Return results *;
return scalar nobs=`nobs';
return scalar mrecl=`mrecl';
return scalar nsect=`nsect';

end;